def get_remote_filesize(url, referer, config, notifier=None): if notifier is None: notifier = PixivHelper.dummy_notifier PixivHelper.print_and_log(None, 'Getting remote filesize...') # open with HEAD method, might be expensive req = PixivHelper.create_custom_request(url, config, referer, head=True) file_size = -1 try: br = PixivBrowserFactory.getBrowser(config=config) res = br.open_novisit(req) content_length = res.info()['Content-Length'] if content_length is not None: file_size = int(content_length) else: PixivHelper.print_and_log('info', "\tNo file size information!") res.close() except KeyError: PixivHelper.print_and_log('info', "\tNo file size information!") except mechanize.HTTPError as e: # fix Issue #503 # handle http errors explicit by code if int(e.code) in (404, 500): PixivHelper.print_and_log('info', "\tNo file size information!") else: raise PixivHelper.print_and_log( None, f"Remote filesize = {PixivHelper.size_in_str(file_size)} ({file_size} Bytes)" ) return file_size
def process_manga_series(caller, config, manga_series_id: int, start_page: int = 1, end_page: int = 0, notifier=None, job_option=None): if notifier is None: notifier = PixivHelper.dummy_notifier try: msg = Fore.YELLOW + Style.NORMAL + f'Processing Manga Series Id: {manga_series_id}' + Style.RESET_ALL PixivHelper.print_and_log(None, msg) notifier(type="MANGA_SERIES", message=msg) if start_page != 1: PixivHelper.print_and_log('info', 'Start Page: ' + str(start_page)) if end_page != 0: PixivHelper.print_and_log('info', 'End Page: ' + str(end_page)) flag = True current_page = start_page while flag: manga_series = PixivBrowserFactory.getBrowser().getMangaSeries(manga_series_id, current_page) for (image_id, order) in manga_series.pages_with_order: result = process_image(caller, config, artist=manga_series.artist, image_id=image_id, user_dir='', bookmark=False, search_tags='', title_prefix="", bookmark_count=-1, image_response_count=-1, notifier=notifier, job_option=job_option, useblacklist=True, manga_series_order=order, manga_series_parent=manga_series) PixivHelper.wait(result, config) current_page += 1 if manga_series.is_last_page: PixivHelper.print_and_log('info', f'Last Page {manga_series.current_page}') flag = False if current_page > end_page and end_page != 0: PixivHelper.print_and_log('info', f'End Page reached {end_page}') flag = False if manga_series.pages_with_order is None or len(manga_series.pages_with_order) == 0: PixivHelper.print_and_log('info', 'No more works.') flag = False except Exception as ex: if isinstance(ex, KeyboardInterrupt): raise caller.ERROR_CODE = getattr(ex, 'errorCode', -1) exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback) PixivHelper.print_and_log('error', f'Error at process_manga_series(): {manga_series_id}') PixivHelper.print_and_log('error', f'Exception: {sys.exc_info()}') raise
def ParseMangaImagesNew(self, page, _br): urls = [] # mangaSection = page.find("section", attrs={'class': 'manga'}) # links = mangaSection.findAll('a') # pattern /member_illust.php?mode=manga_big&illust_id=46279245&page=0 if _br is None: import PixivBrowserFactory _br = PixivBrowserFactory.getExistingBrowser() total = page.find("span", attrs={'class': 'total'}) if total is not None: self.imageCount = int(total.string) for currPage in range(0, self.imageCount): expected_url = '/member_illust.php?mode=manga_big&illust_id=' + str(self.imageId) + '&page=' + str(currPage) try: href = _br.fixUrl(expected_url) print "Fetching big image page:", href bigPage = _br.getPixivPage(url=href, referer="http://www.pixiv.net/member_illust.php?mode=manga&illust_id=" + str( self.imageId)) bigImg = bigPage.find('img') imgUrl = bigImg["src"] # http://i2.pixiv.net/img-original/img/2013/12/27/01/51/37/40538869_p7.jpg print "Found: ", imgUrl urls.append(imgUrl) bigImg.decompose() bigPage.decompose() del bigImg del bigPage except Exception as ex: print ex return urls
def ParseMangaImagesNew(self, page, _br): urls = [] mangaSection = page.find("section", attrs={'class':'manga'}) links = mangaSection.findAll('a') ## /member_illust.php?mode=manga_big&illust_id=46279245&page=0 if _br is None: import PixivBrowserFactory _br = PixivBrowserFactory.getExistingBrowser() for link in links: try: href = _br.fixUrl(link["href"]) print "Fetching big image page:", href bigPage = _br.getPixivPage(url=href, referer = "http://www.pixiv.net/member_illust.php?mode=manga&illust_id=" + str(self.imageId)) bigImg = bigPage.find('img') imgUrl = bigImg["src"] print "Found: ", imgUrl urls.append(imgUrl) bigImg.decompose() bigPage.decompose() del bigImg del bigPage except Exception as ex: print ex total = page.find("span", attrs={'class':'total'}) if total is not None: self.imageCount = int(total.string) if self.imageCount != len(urls): raise PixivException("Different images count: " + str(self.imageCount) + " != " + str(len(urls))) return urls
def ParseMangaImagesNew(self, page): urls = [] mangaSection = page.find("section", attrs={'class':'manga'}) links = mangaSection.findAll('a') ## /member_illust.php?mode=manga_big&illust_id=46279245&page=0 import PixivBrowserFactory _br = PixivBrowserFactory.getExistingBrowser() for link in links: try: href = _br.fixUrl(link["href"]) print "Fetching big image page:", href bigPage = _br.getPixivPage(url=href, referer = "http://www.pixiv.net/member_illust.php?mode=manga&illust_id=" + str(self.imageId)) bigImg = bigPage.find('img') imgUrl = bigImg["src"] print "Found: ", imgUrl urls.append(imgUrl) bigImg.decompose() bigPage.decompose() del bigImg del bigPage except Exception as ex: print ex total = page.find("span", attrs={'class':'total'}) if total is not None: self.imageCount = int(total.string) if self.imageCount != len(urls): raise PixivException("Different images count: " + str(self.imageCount) + " != " + str(len(urls))) return urls
def ParseMangaImagesNew(self, page, _br): urls = [] mangaSection = page.find("section", attrs={'class': 'manga'}) links = mangaSection.findAll('a') # pattern /member_illust.php?mode=manga_big&illust_id=46279245&page=0 if _br is None: import PixivBrowserFactory _br = PixivBrowserFactory.getExistingBrowser() total = page.find("span", attrs={'class': 'total'}) if total is not None: self.imageCount = int(total.string) for currPage in range(0, self.imageCount): expected_url = '/member_illust.php?mode=manga_big&illust_id=' + str(self.imageId) + '&page=' + str(currPage) try: href = _br.fixUrl(expected_url) print "Fetching big image page:", href bigPage = _br.getPixivPage(url=href, referer="http://www.pixiv.net/member_illust.php?mode=manga&illust_id=" + str( self.imageId)) bigImg = bigPage.find('img') imgUrl = bigImg["src"] # http://i2.pixiv.net/img-original/img/2013/12/27/01/51/37/40538869_p7.jpg print "Found: ", imgUrl urls.append(imgUrl) bigImg.decompose() bigPage.decompose() del bigImg del bigPage except Exception as ex: print ex return urls
def perform_download(url, file_size, filename, overwrite, config, referer=None, notifier=None): if notifier is None: notifier = PixivHelper.dummy_notifier if referer is None: referer = config.referer # actual download PixivHelper.print_and_log(None, '\rStart downloading...', newline=False) # fetch filesize req = PixivHelper.create_custom_request(url, config, referer) br = PixivBrowserFactory.getBrowser(config=config) res = br.open_novisit(req) if file_size < 0: # final check before download for download progress bar. try: content_length = res.info()['Content-Length'] if content_length is not None: file_size = int(content_length) except KeyError: file_size = -1 PixivHelper.print_and_log('info', "\tNo file size information!") (downloadedSize, filename) = PixivHelper.download_image(url, filename, res, file_size, overwrite) res.close() gc.collect() return (downloadedSize, filename)
def check_version(br, config=None): if br is None: import PixivBrowserFactory br = PixivBrowserFactory.getBrowser(config=config) result = br.open_with_retry( "https://raw.githubusercontent.com/Nandaka/PixivUtil2/master/PixivConstant.py", retry=3) page = result.read().decode('utf-8') result.close() latest_version_full = re.findall(r"PIXIVUTIL_VERSION = '(\d+)(.*)'", page) latest_version_int = int(latest_version_full[0][0]) curr_version_int = int( re.findall(r"(\d+)", PixivConstant.PIXIVUTIL_VERSION)[0]) is_beta = True if latest_version_full[0][1].find("beta") >= 0 else False if is_beta and not config.notifyBetaVersion: return url = "https://github.com/Nandaka/PixivUtil2/releases" if latest_version_int > curr_version_int: if is_beta: print_and_log( "info", "New beta version available: {0}".format( latest_version_full[0])) else: print_and_log( "info", "New version available: {0}".format(latest_version_full[0])) if config.openNewVersion: webbrowser.open_new(url)
def process_sketch_artists(caller, config, artist_id, start_page=1, end_page=0, title_prefix=None): config.loadConfig(path=caller.configfile) br = PixivBrowserFactory.getBrowser() if title_prefix is None: title_prefix = f"Pixiv Sketch - Processing Artist Id: {artist_id}" else: title_prefix = f"{title_prefix} Pixiv Sketch - Processing Artist Id: {artist_id}" caller.set_console_title(title_prefix) msg = Fore.YELLOW + Style.NORMAL + f'Processing Artist Id: {artist_id} for PixivSketch' + Style.RESET_ALL PixivHelper.print_and_log(None, msg) try: artist = br.sketch_get_posts_by_artist_id(artist_id, end_page) # check if have posts if len(artist.posts) == 0: PixivHelper.print_and_log('warn', f'No images for Artist Id: {artist_id}') return POST_PER_PAGE = 10 start_idx = POST_PER_PAGE * (start_page - 1) end_idx = POST_PER_PAGE * (end_page) if end_page == 0 or end_idx > len(artist.posts): end_idx = len(artist.posts) msg = Fore.YELLOW + Style.NORMAL + f'Processing from post #{start_idx} to #{end_idx}' + Style.RESET_ALL PixivHelper.print_and_log(None, msg) post_to_process = artist.posts[start_idx:end_idx] current_post = 1 for item in post_to_process: caller.set_console_title( f"{title_prefix} - Post {current_post} of {len(post_to_process)}" ) PixivHelper.print_and_log(None, f'Post #: {current_post}') PixivHelper.print_and_log('info', f'Post ID : {item.imageId}') tags_str = ', '.join(item.imageTags) PixivHelper.print_and_log('info', f'Tags : {tags_str}') download_post(caller, config, item) current_post = current_post + 1 except PixivException as pex: PixivHelper.print_and_log( "error", f"Failed to process PixivSketch for {artist_id}, maybe doesn't have PixivSketch? ==> {pex.message}" ) except Exception as ex: if isinstance(ex, KeyboardInterrupt): raise caller.ERROR_CODE = getattr(ex, 'errorCode', -1) exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback) PixivHelper.print_and_log( 'error', f'Error at process_sketch_artists(): {artist_id}') PixivHelper.print_and_log('error', f'Exception: {sys.exc_info()}')
def check_version(): import PixivBrowserFactory br = PixivBrowserFactory.getBrowser() result = br.open_with_retry("https://raw.githubusercontent.com/Nandaka/PixivUtil2/master/PixivConstant.py", retry=3) page = result.read() latest_version_full = re.findall(r"PIXIVUTIL_VERSION = '(\d+)(.*)'", page) latest_version_int = int(latest_version_full[0][0]) curr_version_int = int(re.findall(r"(\d+)", PixivConstant.PIXIVUTIL_VERSION)[0]) is_beta = True if latest_version_full[0][1].find("beta") >= 0 else False if latest_version_int > curr_version_int: print_and_log("info", "New version available: {0}".format(latest_version_full[0]))
def process_fanbox_artist_by_id(caller, config, artist_id, end_page, title_prefix=""): config.loadConfig(path=caller.configfile) br = PixivBrowserFactory.getBrowser() caller.set_console_title(title_prefix) try: artist = br.fanboxGetArtistById(artist_id) except PixivException as pex: PixivHelper.print_and_log("error", "Error gettting FANBOX artist by id: {0} ==> {1}".format(artist_id, pex.message)) return current_page = 1 next_url = None image_count = 1 while True: PixivHelper.print_and_log("info", "Processing {0}, page {1}".format(artist, current_page)) caller.set_console_title(f"{title_prefix} FANBOX Artist {artist}, page {current_page}") try: posts = br.fanboxGetPostsFromArtist(artist, next_url) except PixivException as pex: PixivHelper.print_and_log("error", "Error getting FANBOX posts of artist: {0} ==> {1}".format(artist, pex.message)) break for post in posts: print("#{0}".format(image_count)) post.printPost() # images if post.type in PixivModelFanbox.FanboxPost._supportedType: try: process_fanbox_post(caller, config, post, artist) except KeyboardInterrupt: choice = input("Keyboard Interrupt detected, continue to next post (Y/N)").rstrip("\r") if choice.upper() == 'N': PixivHelper.print_and_log("info", f"FANBOX artist: {artist}, processing aborted") break else: continue image_count += 1 PixivHelper.wait(config) if not artist.hasNextPage: PixivHelper.print_and_log("info", "No more post for {0}".format(artist)) break current_page += 1 if end_page > 0 and current_page > end_page: PixivHelper.print_and_log("info", "Reaching page limit for {0}, limit {1}".format(artist, end_page)) break next_url = artist.nextUrl if next_url is None: PixivHelper.print_and_log("info", "No more next page for {0}".format(artist)) break
def handle_members(caller, job, job_name, job_option): member_ids = list() if "member_ids" in job: print("Multi Member IDs") member_ids = job["member_ids"] elif "member_id" in job: member_id = job["member_id"] member_ids.append(member_id) else: print(f"No member_id or member_ids found in {job_name}!") return start_page = 1 if "start_page" in job: start_page = int(job["start_page"]) end_page = 0 if "end_page" in job: end_page = int(job["end_page"]) from_bookmark = False if "from_bookmark" in job: from_bookmark = bool(job["from_bookmark"]) tags = None if "tags" in job and len(job["tags"]) > 0: tags = job["tags"] include_sketch = False if "include_sketch" in job and len(job["include_sketch"]) > 0: include_sketch = bool(job["include_sketch"]) for member_id in member_ids: PixivArtistHandler.process_member(caller, job_option.config, member_id=member_id, user_dir=job_option.config.rootDirectory, page=start_page, end_page=end_page, bookmark=from_bookmark, tags=tags, title_prefix=f"{job_name} ") if include_sketch: # fetching artist token... (artist_model, _) = PixivBrowserFactory.getBrowser().getMemberPage(member_id) PixivSketchHandler.process_sketch_artists(caller, job_option.config, artist_model.artistToken, start_page=start_page, end_page=end_page, title_prefix=f"{job_name} ")
def ParseToken(self, page, fromImage=False): if self.artistAvatar.endswith("no_profile.png"): if fromImage: temp = page.findAll(attrs={'class':'works_display'}) token = str(temp[0].find('img')['src']) return token.split('/')[-2] else : artistToken = None try: temp = page.find(attrs={'class':'display_works linkStyleWorks'}).ul if temp != None: tokens = temp.findAll('img', attrs={'class':'_thumbnail'}) for token in tokens: try: tempImage = token['data-src'] except: tempImage = token['src'] folders = tempImage.split('/') ## skip http://i2.pixiv.net/img-inf/img/2013/04/07/03/08/21/34846113_s.jpg if folders[3] == 'img-inf': continue artistToken = folders[-2] if artistToken != 'common': return artistToken ## all thumb images are using img-inf ## take the first image and check the medium page if artistToken == None or artistToken != 'common': PixivHelper.GetLogger().info("Unable to parse Artist Token from image list, try to parse from the first image") import PixivBrowserFactory, PixivConstant firstImageLink = temp.find('a', attrs={'class':'work'})['href'] if firstImageLink.find("http") != 0: firstImageLink = PixivConstant.PIXIV_URL + firstImageLink PixivHelper.GetLogger().info("Using: " + firstImageLink + " for parsing artist token") imagePage = PixivBrowserFactory.getBrowser().open(firstImageLink) imageResult = BeautifulSoup(imagePage.read()) token = str(imageResult.find(attrs={'class':'works_display'}).find('img')['src']) return token.split('/')[-2] raise PixivException('Cannot parse artist token, possibly different image structure.', errorCode = PixivException.PARSE_TOKEN_DIFFERENT_IMAGE_STRUCTURE) except TypeError: raise PixivException('Cannot parse artist token, possibly no images.', errorCode = PixivException.PARSE_TOKEN_NO_IMAGES) else : temp = self.artistAvatar.split('/') return temp[-2]
def process_sketch_post(caller, config, post_id): config.loadConfig(path=caller.configfile) br = PixivBrowserFactory.getBrowser() msg = Fore.YELLOW + Style.NORMAL + f'Processing Post Id: {post_id}' + Style.RESET_ALL PixivHelper.print_and_log(None, msg) try: post = br.sketch_get_post_by_post_id(post_id) download_post(caller, config, post) except Exception as ex: if isinstance(ex, KeyboardInterrupt): raise caller.ERROR_CODE = getattr(ex, 'errorCode', -1) exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback) PixivHelper.print_and_log('error', f'Error at process_sketch_post(): {post_id}') PixivHelper.print_and_log('error', f'Exception: {sys.exc_info()}')
def ParseBigImages(self, page, _br): self.imageCount = 1 # Issue #224 # work manga temp = page.find('a', attrs={'class': ' _work manga '}) if temp is not None: if _br is None: import PixivBrowserFactory _br = PixivBrowserFactory.getExistingBrowser() expected_url = '/member_illust.php?mode=big&illust_id=' + str(self.imageId) try: href = _br.fixUrl(expected_url) print "Fetching big image page:", href bigPage = _br.getPixivPage(url=href, referer="https://www.pixiv.net/member_illust.php?mode=medium&illust_id=" + str(self.imageId)) bigImg = bigPage.find('img') imgUrl = bigImg["src"] # http://i2.pixiv.net/img-original/img/2013/12/27/01/51/37/40538869_p7.jpg print "Found: ", imgUrl bigImg.decompose() bigPage.decompose() del bigImg del bigPage return imgUrl except Exception as ex: print ex # new layout for big 20141216 temp = page.find('img', attrs={'class': 'original-image'}) if temp is not None: return str(temp['data-src']) # new layout for big 20141212 temp = page.find('img', attrs={'class': 'big'}) if temp is not None: return str(temp['data-src']) # old layout temp = page.find('img')['src'] return str(temp)
def process_pixiv_by_fanbox_id(caller, config, artist_id, start_page=1, end_page=0, tags=None, title_prefix=""): # Implement #1005 config.loadConfig(path=caller.configfile) br = PixivBrowserFactory.getBrowser() caller.set_console_title(title_prefix) artist = br.fanboxGetArtistById(artist_id) PixivArtistHandler.process_member(caller, config, artist.artistId, user_dir='', page=start_page, end_page=end_page, bookmark=False, tags=tags, title_prefix=title_prefix)
def process_fanbox_post(caller, config, post, artist): # caller function/method # TODO: ideally to be removed or passed as argument db = caller.__dbManager__ br = PixivBrowserFactory.getBrowser() db.insertPost(artist.artistId, post.imageId, post.imageTitle, post.feeRequired, post.worksDate, post.type) post_files = [] flag_processed = False if config.checkDBProcessHistory: result = db.selectPostByPostId(post.imageId) if result: updated_date = result[5] if updated_date is not None and post.updatedDateDatetime <= datetime_z.parse_datetime(updated_date): flag_processed = True try: if not post.is_restricted and not flag_processed: br.fanboxUpdatePost(post) if ((not post.is_restricted) or config.downloadCoverWhenRestricted) and (not flag_processed) and config.downloadCover: # cover image if post.coverImageUrl is not None: # fake the image_url for filename compatibility, add post id and pagenum fake_image_url = post.coverImageUrl.replace("{0}/cover/".format(post.imageId), "{0}_".format(post.imageId)) filename = PixivHelper.make_filename(config.filenameFormatFanboxCover, post, artistInfo=artist, tagsSeparator=config.tagsSeparator, tagsLimit=config.tagsLimit, fileUrl=fake_image_url, bookmark=None, searchTags='', useTranslatedTag=config.useTranslatedTag, tagTranslationLocale=config.tagTranslationLocale) filename = PixivHelper.sanitize_filename(filename, config.rootDirectory) post.linkToFile[post.coverImageUrl] = filename print("Downloading cover from {0}".format(post.coverImageUrl)) print("Saved to {0}".format(filename)) referer = "https://www.pixiv.net/fanbox/creator/{0}/post/{1}".format(artist.artistId, post.imageId) # don't pass the post id and page number to skip db check (result, filename) = PixivDownloadHandler.download_image(caller, post.coverImageUrl, filename, referer, config.overwrite, config.retry, config.backupOldFile, image=post) post_files.append((post.imageId, -1, filename)) PixivHelper.get_logger().debug("Download %s result: %s", filename, result) else: PixivHelper.print_and_log("info", "No Cover Image for post: {0}.".format(post.imageId)) if post.is_restricted: PixivHelper.print_and_log("info", "Skipping post: {0} due to restricted post.".format(post.imageId)) return if flag_processed: PixivHelper.print_and_log("info", "Skipping post: {0} because it was downloaded before.".format(post.imageId)) return if post.images is None or len(post.images) == 0: PixivHelper.print_and_log("info", "No Image available in post: {0}.".format(post.imageId)) else: current_page = 0 print("Image Count = {0}".format(len(post.images))) for image_url in post.images: # fake the image_url for filename compatibility, add post id and pagenum fake_image_url = image_url.replace("{0}/".format(post.imageId), "{0}_p{1}_".format(post.imageId, current_page)) filename = PixivHelper.make_filename(config.filenameFormatFanboxContent, post, artistInfo=artist, tagsSeparator=config.tagsSeparator, tagsLimit=config.tagsLimit, fileUrl=fake_image_url, bookmark=None, searchTags='', useTranslatedTag=config.useTranslatedTag, tagTranslationLocale=config.tagTranslationLocale) filename = PixivHelper.sanitize_filename(filename, config.rootDirectory) post.linkToFile[image_url] = filename referer = "https://www.pixiv.net/fanbox/creator/{0}/post/{1}".format(artist.artistId, post.imageId) print("Downloading image {0} from {1}".format(current_page, image_url)) print("Saved to {0}".format(filename)) # filesize detection and overwrite issue _oldvalue = config.alwaysCheckFileSize config.alwaysCheckFileSize = False # don't pass the post id and page number to skip db check (result, filename) = PixivDownloadHandler.download_image(caller, image_url, filename, referer, False, # config.overwrite somehow unable to get remote filesize config.retry, config.backupOldFile, image=post) if result == PixivConstant.PIXIVUTIL_ABORTED: raise KeyboardInterrupt() post_files.append((post.imageId, current_page, filename)) PixivHelper.get_logger().debug("Download %s result: %s", filename, result) config.alwaysCheckFileSize = _oldvalue current_page = current_page + 1 # Implement #447 filename = PixivHelper.make_filename(config.filenameFormatFanboxInfo, post, artistInfo=artist, tagsSeparator=config.tagsSeparator, tagsLimit=config.tagsLimit, fileUrl="{0}".format(post.imageId), bookmark=None, searchTags='', useTranslatedTag=config.useTranslatedTag, tagTranslationLocale=config.tagTranslationLocale) filename = PixivHelper.sanitize_filename(filename, config.rootDirectory) if config.writeImageInfo: post.WriteInfo(filename + ".txt") if config.writeHtml: if post.type == "article" or (len(post.images) >= config.minImageCountForNonArticle and len(post.body_text) > config.minTextLengthForNonArticle): html_template = PixivConstant.HTML_TEMPLATE if os.path.isfile("template.html"): reader = PixivHelper.open_text_file("template.html") html_template = reader.read() reader.close() post.WriteHtml(html_template, config.useAbsolutePathsInHtml, filename + ".html") if config.writeUrlInDescription: PixivHelper.write_url_in_description(post, config.urlBlacklistRegex, config.urlDumpFilename) finally: if len(post_files) > 0: db.insertPostImages(post_files) db.updatePostUpdateDate(post.imageId, post.updatedDate)
def process_fanbox_artist_by_id(caller, config, artist_id, end_page, title_prefix=""): config.loadConfig(path=caller.configfile) br = PixivBrowserFactory.getBrowser() caller.set_console_title(title_prefix) try: artist = br.fanboxGetArtistById(artist_id) except PixivException as pex: PixivHelper.print_and_log("error", f"Error getting FANBOX artist by id: {artist_id} ==> {pex.message}") if pex.errorCode != PixivException.USER_ID_SUSPENDED: return artist = br.fanboxGetArtistById(artist_id, for_suspended=True) formats = f"{config.filenameFormatFanboxCover}{config.filenameFormatFanboxContent}{config.filenameFormatFanboxInfo}" name_flag = "%artist%" in formats token_flag = "%member_token%" in formats if name_flag or token_flag: result = caller.__dbManager__.selectMemberByMemberId(artist.artistId) if result: artist.artistName = result[1] artist.artistToken = result[7] PixivHelper.print_and_log("info", f"Using saved artist name and token from db: {artist.artistName}, {artist.artistToken}") else: PixivHelper.print_and_log("warn", "Artist name or token found in FANBOX filename formats, but not in db.") if name_flag: artist.artistName = input(f"Please input %artist% for {artist_id}: ").strip() if token_flag: artist.artistToken = input(f"Please input %member_token% for {artist_id}: ").strip() current_page = 1 next_url = None image_count = 1 while True: PixivHelper.print_and_log("info", "Processing {0}, page {1}".format(artist, current_page)) caller.set_console_title(f"{title_prefix} {artist}, page {current_page}") try: posts = br.fanboxGetPostsFromArtist(artist, next_url) except PixivException as pex: PixivHelper.print_and_log("error", "Error getting FANBOX posts of artist: {0} ==> {1}".format(artist, pex.message)) break for post in posts: print("#{0}".format(image_count)) post.printPost() # images if post.type in PixivModelFanbox.FanboxPost._supportedType: try: process_fanbox_post(caller, config, post, artist) except KeyboardInterrupt: choice = input("Keyboard Interrupt detected, continue to next post (Y/N)").rstrip("\r") if choice.upper() == 'N': PixivHelper.print_and_log("info", f"FANBOX artist: {artist}, processing aborted") break else: continue image_count += 1 PixivHelper.wait(config) if not artist.hasNextPage: PixivHelper.print_and_log("info", "No more post for {0}".format(artist)) break current_page += 1 if 0 < end_page < current_page: PixivHelper.print_and_log("info", "Reaching page limit for {0}, limit {1}".format(artist, end_page)) break next_url = artist.nextUrl if next_url is None: PixivHelper.print_and_log("info", "No more next page for {0}".format(artist)) break
def process_tags(caller, tags, page=1, end_page=0, wild_card=True, title_caption=False, start_date=None, end_date=None, use_tags_as_dir=False, member_id=None, bookmark_count=None, oldest_first=False, type_mode=None, notifier=None, job_option=None): # caller function/method # TODO: ideally to be removed or passed as argument config = caller.__config__ config.loadConfig(path=caller.configfile) if notifier is None: notifier = PixivHelper.dummy_notifier # override the config source if job_option is give for filename formats format_src = config if job_option is not None: format_src = job_option search_page = None _last_search_result = None i = page updated_limit_count = 0 try: search_tags = PixivHelper.decode_tags(tags) if use_tags_as_dir: PixivHelper.print_and_log(None, "Save to each directory using query tags.") format_src.rootDirectory += os.sep + PixivHelper.sanitize_filename(search_tags) tags = PixivHelper.encode_tags(tags) images = 1 last_image_id = -1 skipped_count = 0 use_bookmark_data = False if bookmark_count is not None and bookmark_count > 0: use_bookmark_data = True offset = 60 start_offset = (page - 1) * offset stop_offset = end_page * offset PixivHelper.print_and_log('info', f'Searching for: ({search_tags}) {tags}') flag = True while flag: (t, search_page) = PixivBrowserFactory.getBrowser().getSearchTagPage(tags, i, wild_card, title_caption, start_date, end_date, member_id, oldest_first, page, use_bookmark_data, bookmark_count, type_mode, r18mode=format_src.r18mode) if len(t.itemList) == 0: PixivHelper.print_and_log(None, 'No more images') flag = False elif _last_search_result is not None: set1 = set((x.imageId) for x in _last_search_result.itemList) difference = [x for x in t.itemList if (x.imageId) not in set1] if len(difference) == 0: PixivHelper.print_and_log(None, 'Getting duplicated result set, no more new images.') flag = False if flag: for item in t.itemList: last_image_id = item.imageId PixivHelper.print_and_log(None, f'Image #{images}') PixivHelper.print_and_log(None, f'Image Id: {item.imageId}') if bookmark_count is not None and bookmark_count > item.bookmarkCount: PixivHelper.print_and_log(None, f'Bookmark Count: {item.bookmarkCount}') PixivHelper.print_and_log('info', f'Skipping imageId= {item.imageId} because less than bookmark count limit ({bookmark_count} > {item.bookmarkCount}).') skipped_count = skipped_count + 1 continue result = 0 while True: try: if t.availableImages > 0: # PixivHelper.print_and_log(None, "Total Images: " + str(t.availableImages)) total_image = t.availableImages if(stop_offset > 0 and stop_offset < total_image): total_image = stop_offset total_image = total_image - start_offset # PixivHelper.print_and_log(None, "Total Images Offset: " + str(total_image)) else: total_image = ((i - 1) * 20) + len(t.itemList) title_prefix = "Tags:{0} Page:{1} Image {2}+{3} of {4}".format(tags, i, images, skipped_count, total_image) if member_id is not None: title_prefix = "MemberId: {0} Tags:{1} Page:{2} Image {3}+{4} of {5}".format(member_id, tags, i, images, skipped_count, total_image) result = PixivConstant.PIXIVUTIL_OK if not caller.DEBUG_SKIP_PROCESS_IMAGE: result = PixivImageHandler.process_image(caller, config, None, item.imageId, user_dir=format_src.rootDirectory, search_tags=search_tags, title_prefix=title_prefix, bookmark_count=item.bookmarkCount, image_response_count=item.imageResponse, notifier=notifier, job_option=job_option) PixivHelper.wait(result, config) break except KeyboardInterrupt: result = PixivConstant.PIXIVUTIL_KEYBOARD_INTERRUPT break except http.client.BadStatusLine: PixivHelper.print_and_log(None, "Stuff happened, trying again after 2 second...") time.sleep(2) images = images + 1 if result in (PixivConstant.PIXIVUTIL_SKIP_DUPLICATE, PixivConstant.PIXIVUTIL_SKIP_LOCAL_LARGER, PixivConstant.PIXIVUTIL_SKIP_DUPLICATE_NO_WAIT): updated_limit_count = updated_limit_count + 1 if config.checkUpdatedLimit != 0 and updated_limit_count >= config.checkUpdatedLimit: PixivHelper.print_and_log(None, f"Skipping tags: {tags}") PixivBrowserFactory.getBrowser().clear_history() return gc.collect() continue elif result == PixivConstant.PIXIVUTIL_KEYBOARD_INTERRUPT: choice = input("Keyboard Interrupt detected, continue to next image (Y/N)").rstrip("\r") if choice.upper() == 'N': PixivHelper.print_and_log("info", f"Tags: {tags}, processing aborted.") flag = False break else: continue PixivBrowserFactory.getBrowser().clear_history() i = i + 1 _last_search_result = t if end_page != 0 and end_page < i: PixivHelper.print_and_log('info', f"End Page reached: {end_page}") flag = False if t.isLastPage: PixivHelper.print_and_log('info', f"Last page: {i - 1}") flag = False if config.enableInfiniteLoop and i == 1001 and not oldest_first: if last_image_id > 0: # get the last date PixivHelper.print_and_log('info', f"Hit page 1000, trying to get workdate for last image id: {last_image_id}.") # referer = 'https://www.pixiv.net/en/artworks/{0}'.format(last_image_id) result = PixivBrowserFactory.getBrowser().getImagePage(last_image_id) _last_date = result[0].worksDateDateTime # _start_date = image.worksDateDateTime + datetime.timedelta(365) # hit the last page i = 1 end_date = _last_date.strftime("%Y-%m-%d") PixivHelper.print_and_log('info', f"Hit page 1000, looping back to page 1 with ecd: {end_date}.") flag = True last_image_id = -1 else: PixivHelper.print_and_log('info', "No more image in the list.") flag = False PixivHelper.print_and_log(None, 'done') if search_page is not None: del search_page except KeyboardInterrupt: raise except BaseException: PixivHelper.print_and_log('error', f'Error at process_tags() at page {i}: {sys.exc_info()}') try: if search_page is not None: dump_filename = f'Error page for search tags {tags} at page {i}.html' PixivHelper.dump_html(dump_filename, search_page) PixivHelper.print_and_log('error', f"Dumping html to: {dump_filename}") except BaseException: PixivHelper.print_and_log('error', f'Cannot dump page for search tags: {search_tags}') raise
# -*- coding: UTF-8 -*- import PixivUtil2 import PixivBrowserFactory import PixivConfig import getpass import mechanize __config__ = PixivConfig.PixivConfig() PixivUtil2.__config__ = __config__ __config__.loadConfig() __br__ = PixivUtil2.__br__ = PixivBrowserFactory.getBrowser(config=__config__) def prepare(): ## Log in username = __config__.username if username == '': username = raw_input('Username ? ') password = __config__.password if password == '': password = getpass.getpass('Password ? ') result = False if len(__config__.cookie) > 0: result = __br__.loginUsingCookie(__config__.cookie) if not result: result = __br__.login(username, password) return result
def ParseToken(self, page, fromImage=False): if self.artistAvatar.endswith("no_profile.png"): if fromImage: temp = page.findAll(attrs={'class': 'works_display'}) token = str(temp[0].find('img')['src']) return token.split('/')[-2] else: artistToken = None try: temp = page.find(attrs={ 'class': 'display_works linkStyleWorks' }).ul if temp != None: tokens = temp.findAll('img', attrs={'class': '_thumbnail'}) for token in tokens: try: tempImage = token['data-src'] except: tempImage = token['src'] folders = tempImage.split('/') ## skip http://i2.pixiv.net/img-inf/img/2013/04/07/03/08/21/34846113_s.jpg if folders[3] == 'img-inf': continue artistToken = folders[-2] if artistToken != 'common': return artistToken ## all thumb images are using img-inf ## take the first image and check the medium page if artistToken == None or artistToken != 'common': PixivHelper.GetLogger().info( "Unable to parse Artist Token from image list, try to parse from the first image" ) import PixivBrowserFactory, PixivConstant firstImageLink = temp.find('a', attrs={'class': 'work'})['href'] if firstImageLink.find("http") != 0: firstImageLink = PixivConstant.PIXIV_URL + firstImageLink PixivHelper.GetLogger().info( "Using: " + firstImageLink + " for parsing artist token") imagePage = PixivBrowserFactory.getBrowser().open( firstImageLink) imageResult = BeautifulSoup(imagePage.read()) token = str( imageResult.find(attrs={ 'class': 'works_display' }).find('img')['src']) return token.split('/')[-2] raise PixivException( 'Cannot parse artist token, possibly different image structure.', errorCode=PixivException. PARSE_TOKEN_DIFFERENT_IMAGE_STRUCTURE) except TypeError: raise PixivException( 'Cannot parse artist token, possibly no images.', errorCode=PixivException.PARSE_TOKEN_NO_IMAGES) else: temp = self.artistAvatar.split('/') return temp[-2]
def process_tags(caller, config, tags, page=1, end_page=0, wild_card=True, title_caption=False, start_date=None, end_date=None, use_tags_as_dir=False, member_id=None, bookmark_count=None, sort_order='date_d', type_mode=None, notifier=None): if notifier is None: notifier = PixivHelper.dummy_notifier search_page = None _last_search_result = None i = page updated_limit_count = 0 empty_page_retry = 0 try: search_tags = PixivHelper.decode_tags(tags) root_dir = config.rootDirectory if use_tags_as_dir: PixivHelper.print_and_log(None, "Save to each directory using query tags.") root_dir = config.rootDirectory + os.sep + PixivHelper.sanitize_filename(search_tags) tags = PixivHelper.encode_tags(tags) images = 1 last_image_id = -1 skipped_count = 0 use_bookmark_data = False if bookmark_count is not None and bookmark_count > 0: use_bookmark_data = True offset = PixivTags.POSTS_PER_PAGE start_offset = (page - 1) * offset stop_offset = end_page * offset PixivHelper.print_and_log('info', f'Searching for: ({search_tags}) {tags} with partial match = {wild_card} and title/caption = {title_caption}') flag = True while flag: (t, search_page) = PixivBrowserFactory.getBrowser().getSearchTagPage(tags, i, wild_card=wild_card, title_caption=title_caption, start_date=start_date, end_date=end_date, member_id=member_id, sort_order=sort_order, start_page=page, use_bookmark_data=use_bookmark_data, bookmark_count=bookmark_count, type_mode=type_mode, r18mode=config.r18mode) PixivHelper.print_and_log("info", f'Found {len(t.itemList)} images for page {i}.') if len(t.itemList) == 0: # Issue #1090 # check if the available images matching with current page * PixivTags.POSTS_PER_PAGE # and wait for {timeout} seconds and retry the page up to {config.retry} times. if _last_search_result is not None and _last_search_result.availableImages > (PixivTags.POSTS_PER_PAGE * i) and empty_page_retry < config.retry: PixivHelper.print_and_log("warn", f'Server did not return images, expected to have more (Total Post = {_last_search_result.availableImages}, current max posts = {PixivTags.POSTS_PER_PAGE * i}).') # wait at least 2 minutes before retry delay = config.timeout if delay < 120: delay = 120 PixivHelper.print_and_log(None, f"Waiting for {delay} seconds before retrying.") PixivHelper.print_delay(delay) empty_page_retry = empty_page_retry + 1 PixivBrowserFactory.getBrowser().addheaders = [('User-agent', f'{config.useragent}{int(time.time())}')] continue else: PixivHelper.print_and_log("warn", 'No more images.') flag = False elif _last_search_result is not None: set1 = set((x.imageId) for x in _last_search_result.itemList) difference = [x for x in t.itemList if (x.imageId) not in set1] if len(difference) == 0: PixivHelper.print_and_log("warn", 'Getting duplicated result set, no more new images.') flag = False if flag: # Issue #1090 reset retry flag on succesfull load empty_page_retry = 0 for item in t.itemList: last_image_id = item.imageId PixivHelper.print_and_log(None, f'Image #{images}') PixivHelper.print_and_log(None, f'Image Id: {item.imageId}') if bookmark_count is not None and bookmark_count > item.bookmarkCount: PixivHelper.print_and_log(None, f'Bookmark Count: {item.bookmarkCount}') PixivHelper.print_and_log('info', f'Skipping imageId= {item.imageId} because less than bookmark count limit ({bookmark_count} > {item.bookmarkCount}).') skipped_count = skipped_count + 1 continue result = 0 while True: try: if t.availableImages > 0: # PixivHelper.print_and_log(None, "Total Images: " + str(t.availableImages)) total_image = t.availableImages if(stop_offset > 0 and stop_offset < total_image): total_image = stop_offset total_image = total_image - start_offset # PixivHelper.print_and_log(None, "Total Images Offset: " + str(total_image)) else: total_image = ((i - 1) * 20) + len(t.itemList) title_prefix = "Tags:{0} Page:{1} Image {2}+{3} of {4}".format(tags, i, images, skipped_count, total_image) if member_id is not None: title_prefix = "MemberId: {0} Tags:{1} Page:{2} Image {3}+{4} of {5}".format(member_id, tags, i, images, skipped_count, total_image) result = PixivConstant.PIXIVUTIL_OK if not caller.DEBUG_SKIP_PROCESS_IMAGE: result = PixivImageHandler.process_image(caller, config, None, item.imageId, user_dir=root_dir, search_tags=search_tags, title_prefix=title_prefix, bookmark_count=item.bookmarkCount, image_response_count=item.imageResponse, notifier=notifier) PixivHelper.wait(result, config) break except KeyboardInterrupt: result = PixivConstant.PIXIVUTIL_KEYBOARD_INTERRUPT break except http.client.BadStatusLine: PixivHelper.print_and_log(None, "Stuff happened, trying again after 2 second...") PixivHelper.print_delay(2) images = images + 1 if result in (PixivConstant.PIXIVUTIL_SKIP_DUPLICATE, PixivConstant.PIXIVUTIL_SKIP_LOCAL_LARGER, PixivConstant.PIXIVUTIL_SKIP_DUPLICATE_NO_WAIT): updated_limit_count = updated_limit_count + 1 if config.checkUpdatedLimit != 0 and updated_limit_count >= config.checkUpdatedLimit: PixivHelper.print_and_log(None, f"Skipping tags: {tags}") PixivBrowserFactory.getBrowser().clear_history() return gc.collect() continue elif result == PixivConstant.PIXIVUTIL_KEYBOARD_INTERRUPT: choice = input("Keyboard Interrupt detected, continue to next image (Y/N)").rstrip("\r") if choice.upper() == 'N': PixivHelper.print_and_log("info", f"Tags: {tags}, processing aborted.") flag = False break else: continue PixivBrowserFactory.getBrowser().clear_history() i = i + 1 _last_search_result = t if end_page != 0 and end_page < i: PixivHelper.print_and_log('info', f"End Page reached: {end_page}") flag = False if t.isLastPage: PixivHelper.print_and_log('info', f"Last page: {i - 1}") flag = False if config.enableInfiniteLoop and i == 1001 and sort_order != 'date': if last_image_id > 0: # get the last date PixivHelper.print_and_log('info', f"Hit page 1000, trying to get workdate for last image id: {last_image_id}.") # referer = 'https://www.pixiv.net/en/artworks/{0}'.format(last_image_id) result = PixivBrowserFactory.getBrowser().getImagePage(last_image_id) _last_date = result[0].worksDateDateTime # _start_date = image.worksDateDateTime + datetime.timedelta(365) # hit the last page i = 1 end_date = _last_date.strftime("%Y-%m-%d") PixivHelper.print_and_log('info', f"Hit page 1000, looping back to page 1 with ecd: {end_date}.") flag = True last_image_id = -1 else: PixivHelper.print_and_log('info', "No more image in the list.") flag = False PixivHelper.print_and_log(None, 'done') if search_page is not None: del search_page except KeyboardInterrupt: raise except BaseException: PixivHelper.print_and_log('error', f'Error at process_tags() at page {i}: {sys.exc_info()}') try: if search_page is not None: dump_filename = f'Error page for search tags {tags} at page {i}.html' PixivHelper.dump_html(dump_filename, search_page) PixivHelper.print_and_log('error', f"Dumping html to: {dump_filename}") except BaseException: PixivHelper.print_and_log('error', f'Cannot dump page for search tags: {search_tags}') raise
def process_member(caller, config, member_id, user_dir='', page=1, end_page=0, bookmark=False, tags=None, title_prefix="", notifier=None, job_option=None): # caller function/method # TODO: ideally to be removed or passed as argument db = caller.__dbManager__ config.loadConfig(path=caller.configfile) np = caller.np np_is_valid = caller.np_is_valid if notifier is None: notifier = PixivHelper.dummy_notifier # override the config source if job_option is give for filename formats format_src = config if job_option is not None: format_src = job_option list_page = None msg = Fore.YELLOW + Style.BRIGHT + f'Processing Member Id: {member_id}' + Style.RESET_ALL PixivHelper.print_and_log('info', msg) notifier(type="MEMBER", message=msg) if page != 1: PixivHelper.print_and_log('info', 'Start Page: ' + str(page)) if end_page != 0: PixivHelper.print_and_log('info', 'End Page: ' + str(end_page)) if config.numberOfPage != 0: PixivHelper.print_and_log( 'info', 'Number of page setting will be ignored') elif np != 0: PixivHelper.print_and_log('info', 'End Page from command line: ' + str(np)) elif config.numberOfPage != 0: PixivHelper.print_and_log( 'info', 'End Page from config: ' + str(config.numberOfPage)) # calculate the offset for display properties offset = 48 # new offset for AJAX call offset_start = (page - 1) * offset offset_stop = end_page * offset try: no_of_images = 1 is_avatar_downloaded = False flag = True updated_limit_count = 0 image_id = -1 while flag: PixivHelper.print_and_log(None, 'Page ', page) caller.set_console_title( f"{title_prefix}MemberId: {member_id} Page: {page}") # Try to get the member page while True: try: (artist, list_page ) = PixivBrowserFactory.getBrowser().getMemberPage( member_id, page, bookmark, tags, r18mode=format_src.r18mode) break except PixivException as ex: caller.ERROR_CODE = ex.errorCode PixivHelper.print_and_log( 'info', f'Member ID ({member_id}): {ex}') if ex.errorCode == PixivException.NO_IMAGES: pass else: if list_page is None: list_page = ex.htmlPage if list_page is not None: PixivHelper.dump_html( f"Dump for {member_id} Error Code {ex.errorCode}.html", list_page) if ex.errorCode == PixivException.USER_ID_NOT_EXISTS or ex.errorCode == PixivException.USER_ID_SUSPENDED: db.setIsDeletedFlagForMemberId(int(member_id)) PixivHelper.print_and_log( 'info', f'Set IsDeleted for MemberId: {member_id} not exist.' ) # db.deleteMemberByMemberId(member_id) # PixivHelper.printAndLog('info', 'Deleting MemberId: ' + str(member_id) + ' not exist.') if ex.errorCode == PixivException.OTHER_MEMBER_ERROR: PixivHelper.print_and_log(None, ex.message) caller.__errorList.append( dict(type="Member", id=str(member_id), message=ex.message, exception=ex)) return except AttributeError: # Possible layout changes, try to dump the file below raise except BaseException: exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback) PixivHelper.print_and_log( 'error', f'Error at processing Artist Info: {sys.exc_info()}') PixivHelper.print_and_log(None, f'Member Name : {artist.artistName}') PixivHelper.print_and_log(None, f'Member Avatar: {artist.artistAvatar}') PixivHelper.print_and_log(None, f'Member Token : {artist.artistToken}') PixivHelper.print_and_log( None, f'Member Background : {artist.artistBackground}') print_offset_stop = offset_stop if offset_stop < artist.totalImages and offset_stop != 0 else artist.totalImages PixivHelper.print_and_log( None, f'Processing images from {offset_start + 1} to {print_offset_stop} of {artist.totalImages}' ) if not is_avatar_downloaded and config.downloadAvatar: if user_dir == '': target_dir = format_src.rootDirectory else: target_dir = user_dir avatar_filename = PixivHelper.create_avatar_filename( artist, target_dir, format_src) if not caller.DEBUG_SKIP_PROCESS_IMAGE: if artist.artistAvatar.find('no_profile') == -1: PixivDownloadHandler.download_image( caller, artist.artistAvatar, avatar_filename, "https://www.pixiv.net/", config.overwrite, config.retry, config.backupOldFile, notifier=notifier) # Issue #508 if artist.artistBackground is not None and artist.artistBackground.startswith( "http"): bg_name = PixivHelper.create_bg_filename_from_avatar_filename( avatar_filename) PixivDownloadHandler.download_image( caller, artist.artistBackground, bg_name, "https://www.pixiv.net/", config.overwrite, config.retry, config.backupOldFile, notifier=notifier) is_avatar_downloaded = True if config.autoAddMember: db.insertNewMember(int(member_id)) db.updateMemberName(member_id, artist.artistName) if not artist.haveImages: PixivHelper.print_and_log('info', f"No image found for: {member_id}") db.updateLastDownloadDate(member_id) flag = False continue result = PixivConstant.PIXIVUTIL_NOT_OK for image_id in artist.imageList: PixivHelper.print_and_log(None, f'#{no_of_images}') retry_count = 0 while True: try: if artist.totalImages > 0: # PixivHelper.safePrint("Total Images = " + str(artist.totalImages)) total_image_page_count = artist.totalImages if (offset_stop > 0 and offset_stop < total_image_page_count): total_image_page_count = offset_stop total_image_page_count = total_image_page_count - offset_start # PixivHelper.safePrint("Total Images Offset = " + str(total_image_page_count)) else: total_image_page_count = ( (page - 1) * 20) + len(artist.imageList) title_prefix_img = f"{title_prefix}MemberId: {member_id} Page: {page} Post {no_of_images}+{updated_limit_count} of {total_image_page_count}" if not caller.DEBUG_SKIP_PROCESS_IMAGE: result = PixivImageHandler.process_image( caller, config, artist, image_id, user_dir, bookmark, title_prefix=title_prefix_img, notifier=notifier, job_option=job_option) break except KeyboardInterrupt: result = PixivConstant.PIXIVUTIL_KEYBOARD_INTERRUPT break except BaseException: if retry_count > config.retry: PixivHelper.print_and_log( 'error', f"Giving up image_id: {image_id}") return retry_count = retry_count + 1 PixivHelper.print_and_log( None, f"Stuff happened, trying again after 2 second ({retry_count})" ) exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback) PixivHelper.print_and_log( "error", f"Error at process_member(): {sys.exc_info()} Member Id: {member_id}" ) time.sleep(2) if result in (PixivConstant.PIXIVUTIL_SKIP_DUPLICATE, PixivConstant.PIXIVUTIL_SKIP_LOCAL_LARGER, PixivConstant.PIXIVUTIL_SKIP_DUPLICATE_NO_WAIT): updated_limit_count = updated_limit_count + 1 if config.checkUpdatedLimit != 0 and updated_limit_count >= config.checkUpdatedLimit: PixivHelper.safePrint(f"Skipping member: {member_id}") db.updateLastDownloadDate(member_id) PixivBrowserFactory.getBrowser( config=config).clear_history() return gc.collect() continue if result == PixivConstant.PIXIVUTIL_KEYBOARD_INTERRUPT: choice = input( "Keyboard Interrupt detected, continue to next image (Y/N)" ).rstrip("\r") if choice.upper() == 'N': PixivHelper.print_and_log( "info", f"Member: {member_id}, processing aborted") flag = False break else: continue # return code from process image if result == PixivConstant.PIXIVUTIL_SKIP_OLDER: PixivHelper.print_and_log( "info", "Reached older images, skippin to next member.") db.updateLastDownloadDate(member_id) flag = False break no_of_images = no_of_images + 1 PixivHelper.wait(result, config) if artist.isLastPage: db.updateLastDownloadDate(member_id) PixivHelper.print_and_log(None, "Last Page") flag = False page = page + 1 # page limit checking if end_page > 0 and page > end_page: PixivHelper.print_and_log( None, f"Page limit reached (from endPage limit ={end_page})") db.updateLastDownloadDate(member_id) flag = False else: if np_is_valid: # Yavos: overwriting config-data if page > np and np > 0: PixivHelper.print_and_log( None, f"Page limit reached (from command line ={np})") flag = False elif page > config.numberOfPage and config.numberOfPage > 0: PixivHelper.print_and_log( None, f"Page limit reached (from config ={config.numberOfPage})" ) flag = False del artist del list_page PixivBrowserFactory.getBrowser(config=config).clear_history() gc.collect() log_message = "" if int(image_id) > 0: db.updateLastDownloadedImage(member_id, image_id) log_message = f'last image_id: {image_id}' else: log_message = 'no images were found.' PixivHelper.print_and_log( "info", f"Member_id: {member_id} completed: {log_message}") except KeyboardInterrupt: raise except Exception: exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback) PixivHelper.print_and_log( 'error', f'Error at process_member(): {sys.exc_info()}') try: if list_page is not None: dump_filename = f'Error page for member {member_id} at page {page}.html' PixivHelper.dump_html(dump_filename, list_page) PixivHelper.print_and_log('error', f"Dumping html to: {dump_filename}") except BaseException: PixivHelper.print_and_log( 'error', f'Cannot dump page for member_id: {member_id}') raise
def process_image(caller, config, artist=None, image_id=None, user_dir='', bookmark=False, search_tags='', title_prefix="", bookmark_count=-1, image_response_count=-1, notifier=None, job_option=None): # caller function/method # TODO: ideally to be removed or passed as argument db = caller.__dbManager__ if notifier is None: notifier = PixivHelper.dummy_notifier # override the config source if job_option is give for filename formats format_src = config if job_option is not None: format_src = job_option parse_medium_page = None image = None result = None referer = f'https://www.pixiv.net/artworks/{image_id}' filename = f'no-filename-{image_id}.tmp' try: msg = Fore.YELLOW + Style.NORMAL + f'Processing Image Id: {image_id}' + Style.RESET_ALL PixivHelper.print_and_log(None, msg) notifier(type="IMAGE", message=msg) # check if already downloaded. images won't be downloaded twice - needed in process_image to catch any download r = db.selectImageByImageId(image_id, cols='save_name') exists = False in_db = False if r is not None: exists = db.cleanupFileExists(r[0]) in_db = True # skip if already recorded in db and alwaysCheckFileSize is disabled and overwrite is disabled. if in_db and not config.alwaysCheckFileSize and not config.overwrite: PixivHelper.print_and_log(None, f'Already downloaded in DB: {image_id}') gc.collect() return PixivConstant.PIXIVUTIL_SKIP_DUPLICATE_NO_WAIT # get the medium page try: (image, parse_medium_page) = PixivBrowserFactory.getBrowser().getImagePage(image_id=image_id, parent=artist, from_bookmark=bookmark, bookmark_count=bookmark_count) if len(title_prefix) > 0: caller.set_console_title(f"{title_prefix} ImageId: {image.imageId}") else: caller.set_console_title(f"MemberId: {image.artist.artistId} ImageId: {image.imageId}") except PixivException as ex: caller.ERROR_CODE = ex.errorCode caller.__errorList.append(dict(type="Image", id=str(image_id), message=ex.message, exception=ex)) if ex.errorCode == PixivException.UNKNOWN_IMAGE_ERROR: PixivHelper.print_and_log('error', ex.message) elif ex.errorCode == PixivException.SERVER_ERROR: PixivHelper.print_and_log('error', f'Giving up image_id (medium): {image_id}') elif ex.errorCode > 2000: PixivHelper.print_and_log('error', f'Image Error for {image_id}: {ex.message}') if parse_medium_page is not None: dump_filename = f'Error medium page for image {image_id}.html' PixivHelper.dump_html(dump_filename, parse_medium_page) PixivHelper.print_and_log('error', f'Dumping html to: {dump_filename}') else: PixivHelper.print_and_log('error', f'Image ID ({image_id}): {ex}') PixivHelper.print_and_log('error', f'Stack Trace: {sys.exc_info()}') return PixivConstant.PIXIVUTIL_NOT_OK except Exception as ex: PixivHelper.print_and_log('error', f'Image ID ({image_id}): {ex}') if parse_medium_page is not None: dump_filename = f'Error medium page for image {image_id}.html' PixivHelper.dump_html(dump_filename, parse_medium_page) PixivHelper.print_and_log('error', f'Dumping html to: {dump_filename}') PixivHelper.print_and_log('error', f'Stack Trace: {sys.exc_info()}') exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback) return PixivConstant.PIXIVUTIL_NOT_OK download_image_flag = True # date validation and blacklist tag validation if config.dateDiff > 0: if image.worksDateDateTime != datetime.datetime.fromordinal(1).replace(tzinfo=datetime_z.utc): if image.worksDateDateTime < (datetime.datetime.today() - datetime.timedelta(config.dateDiff)).replace(tzinfo=datetime_z.utc): PixivHelper.print_and_log('info', f'Skipping image_id: {image_id} because contains older than: {config.dateDiff} day(s).') download_image_flag = False result = PixivConstant.PIXIVUTIL_SKIP_OLDER if config.useBlacklistMembers and download_image_flag: if str(image.originalArtist.artistId) in caller.__blacklistMembers: PixivHelper.print_and_log('info', f'Skipping image_id: {image_id} because contains blacklisted member id: {image.originalArtist.artistId}') download_image_flag = False result = PixivConstant.PIXIVUTIL_SKIP_BLACKLIST if config.useBlacklistTags and download_image_flag: for item in caller.__blacklistTags: if item in image.imageTags: PixivHelper.print_and_log('info', f'Skipping image_id: {image_id} because contains blacklisted tags: {item}') download_image_flag = False result = PixivConstant.PIXIVUTIL_SKIP_BLACKLIST break if config.useBlacklistTitles and download_image_flag: for item in caller.__blacklistTitles: if item in image.imageTitle: PixivHelper.print_and_log('info', f'Skipping image_id: {image_id} because contains blacklisted Title: {item}') download_image_flag = False result = PixivConstant.PIXIVUTIL_SKIP_BLACKLIST break if download_image_flag and not caller.DEBUG_SKIP_DOWNLOAD_IMAGE: if artist is None: PixivHelper.print_and_log(None, f'Member Name : {image.artist.artistName}') PixivHelper.print_and_log(None, f'Member Avatar: {image.artist.artistAvatar}') PixivHelper.print_and_log(None, f'Member Token : {image.artist.artistToken}') PixivHelper.print_and_log(None, f'Member Background : {image.artist.artistBackground}') PixivHelper.print_and_log(None, f"Title: {image.imageTitle}") tags_str = ', '.join(image.imageTags) PixivHelper.print_and_log(None, f"Tags : {tags_str}") PixivHelper.print_and_log(None, f"Date : {image.worksDateDateTime}") PixivHelper.print_and_log(None, f"Mode : {image.imageMode}") # get bookmark count if ("%bookmark_count%" in format_src.filenameFormat or "%image_response_count%" in format_src.filenameFormat) and image.bookmark_count == -1: PixivHelper.print_and_log(None, "Parsing bookmark page", end=' ') bookmark_url = f'https://www.pixiv.net/bookmark_detail.php?illust_id={image_id}' parse_bookmark_page = PixivBrowserFactory.getBrowser().getPixivPage(bookmark_url) image.ParseBookmarkDetails(parse_bookmark_page) parse_bookmark_page.decompose() del parse_bookmark_page PixivHelper.print_and_log(None, f"Bookmark Count : {image.bookmark_count}") caller.__br__.back() if config.useSuppressTags: for item in caller.__suppressTags: if item in image.imageTags: image.imageTags.remove(item) # get manga page if image.imageMode == 'manga': PixivHelper.print_and_log(None, f"Page Count : {image.imageCount}") if user_dir == '': # Yavos: use config-options target_dir = format_src.rootDirectory else: # Yavos: use filename from list target_dir = user_dir result = PixivConstant.PIXIVUTIL_OK manga_files = list() page = 0 # Issue #639 source_urls = image.imageUrls if config.downloadResized: source_urls = image.imageResizedUrls for img in source_urls: PixivHelper.print_and_log(None, f'Image URL : {img}') url = os.path.basename(img) split_url = url.split('.') if split_url[0].startswith(str(image_id)): filename_format = format_src.filenameFormat if image.imageMode == 'manga': filename_format = format_src.filenameMangaFormat filename = PixivHelper.make_filename(filename_format, image, tagsSeparator=config.tagsSeparator, tagsLimit=config.tagsLimit, fileUrl=url, bookmark=bookmark, searchTags=search_tags, useTranslatedTag=config.useTranslatedTag, tagTranslationLocale=config.tagTranslationLocale) filename = PixivHelper.sanitize_filename(filename, target_dir) if image.imageMode == 'manga' and config.createMangaDir: manga_page = caller.__re_manga_page.findall(filename) if len(manga_page) > 0: splitted_filename = filename.split(manga_page[0][0], 1) splitted_manga_page = manga_page[0][0].split("_p", 1) # filename = splitted_filename[0] + splitted_manga_page[0] + os.sep + "_p" + splitted_manga_page[1] + splitted_filename[1] filename = f"{splitted_filename[0]}{splitted_manga_page[0]}{os.sep}_p{splitted_manga_page[1]}{splitted_filename[1]}" PixivHelper.print_and_log('info', f'Filename : {filename}') result = PixivConstant.PIXIVUTIL_NOT_OK try: (result, filename) = PixivDownloadHandler.download_image(caller, img, filename, referer, config.overwrite, config.retry, config.backupOldFile, image, page, notifier) if result == PixivConstant.PIXIVUTIL_NOT_OK: PixivHelper.print_and_log('error', f'Image url not found/failed to download: {image.imageId}') elif result == PixivConstant.PIXIVUTIL_ABORTED: raise KeyboardInterrupt() manga_files.append((image_id, page, filename)) page = page + 1 except urllib.error.URLError: PixivHelper.print_and_log('error', f'Error when download_image(), giving up url: {img}') PixivHelper.print_and_log(None, '') if config.writeImageInfo or config.writeImageJSON: filename_info_format = format_src.filenameInfoFormat or format_src.filenameFormat # Issue #575 if image.imageMode == 'manga': filename_info_format = format_src.filenameMangaInfoFormat or format_src.filenameMangaFormat or filename_info_format info_filename = PixivHelper.make_filename(filename_info_format, image, tagsSeparator=config.tagsSeparator, tagsLimit=config.tagsLimit, fileUrl=url, appendExtension=False, bookmark=bookmark, searchTags=search_tags, useTranslatedTag=config.useTranslatedTag, tagTranslationLocale=config.tagTranslationLocale) info_filename = PixivHelper.sanitize_filename(info_filename, target_dir) # trim _pXXX info_filename = re.sub(r'_p?\d+$', '', info_filename) if config.writeImageInfo: image.WriteInfo(info_filename + ".txt") if config.writeImageJSON: image.WriteJSON(info_filename + ".json") if image.imageMode == 'ugoira_view': if config.writeUgoiraInfo: image.WriteUgoiraData(filename + ".js") # Handle #451 if config.createUgoira and (result in (PixivConstant.PIXIVUTIL_OK, PixivConstant.PIXIVUTIL_SKIP_DUPLICATE)): PixivDownloadHandler.handle_ugoira(image, filename, config, notifier) if config.writeUrlInDescription: PixivHelper.write_url_in_description(image, config.urlBlacklistRegex, config.urlDumpFilename) if in_db and not exists: result = PixivConstant.PIXIVUTIL_CHECK_DOWNLOAD # There was something in the database which had not been downloaded # Only save to db if all images is downloaded completely if result in (PixivConstant.PIXIVUTIL_OK, PixivConstant.PIXIVUTIL_SKIP_DUPLICATE, PixivConstant.PIXIVUTIL_SKIP_LOCAL_LARGER): try: db.insertImage(image.artist.artistId, image.imageId, image.imageMode) except BaseException: PixivHelper.print_and_log('error', f'Failed to insert image id:{image.imageId} to DB') db.updateImage(image.imageId, image.imageTitle, filename, image.imageMode) if len(manga_files) > 0: db.insertMangaImages(manga_files) # map back to PIXIVUTIL_OK (because of ugoira file check) result = 0 if image is not None: del image if parse_medium_page is not None: del parse_medium_page gc.collect() PixivHelper.print_and_log(None, '\n') return result except Exception as ex: if isinstance(ex, KeyboardInterrupt): raise caller.ERROR_CODE = getattr(ex, 'errorCode', -1) exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback) PixivHelper.print_and_log('error', f'Error at process_image(): {image_id}') PixivHelper.print_and_log('error', f'Exception: {sys.exc_info()}') if parse_medium_page is not None: dump_filename = f'Error medium page for image {image_id}.html' PixivHelper.dump_html(dump_filename, parse_medium_page) PixivHelper.print_and_log('error', f'Dumping html to: {dump_filename}') raise