def download_image(caller, url, filename, referer, overwrite, max_retry, backup_old_file=False, image=None, page=None, notifier=None): '''return download result and filename if ok''' # caller function/method # TODO: ideally to be removed or passed as argument db = caller.__dbManager__ config: PixivConfig = caller.__config__ if notifier is None: notifier = PixivHelper.dummy_notifier temp_error_code = None retry_count = 0 # Issue #548 filename_save = filename # test once and set the result if caller.UTF8_FS is None: filename_test = os.path.dirname(filename_save) + os.sep + "あいうえお" try: PixivHelper.makeSubdirs(filename_test) test_utf = open(filename_test + '.test', "wb") test_utf.close() os.remove(filename_test + '.test') caller.UTF8_FS = True except UnicodeEncodeError: caller.UTF8_FS = False if not caller.UTF8_FS: filename_save = filename.encode( 'utf-8' ) # For file operations, force the usage of a utf-8 encode filename while retry_count <= max_retry: res = None req = None try: try: if not overwrite and not config.alwaysCheckFileSize: PixivHelper.print_and_log(None, '\rChecking local filename...', newline=False) if os.path.isfile(filename_save): PixivHelper.print_and_log( 'info', f"\rLocal file exists: {filename}") return (PixivConstant.PIXIVUTIL_SKIP_DUPLICATE, filename_save) # Issue #807 if config.checkLastModified and os.path.isfile( filename_save) and image is not None: local_timestamp = os.path.getmtime(filename_save) remote_timestamp = time.mktime( image.worksDateDateTime.timetuple()) if local_timestamp == remote_timestamp: PixivHelper.print_and_log( 'info', f"\rLocal file timestamp match with remote: {filename} => {image.worksDateDateTime}" ) return (PixivConstant.PIXIVUTIL_SKIP_DUPLICATE, filename_save) remote_file_size = get_remote_filesize(url, referer, config, notifier) # 837 if config.skipUnknownSize and os.path.isfile( filename_save) and remote_file_size == -1: PixivHelper.print_and_log( 'info', f"\rSkipped because file exists and cannot get remote file size for: {filename}" ) return (PixivConstant.PIXIVUTIL_SKIP_DUPLICATE, filename_save) # 576 if remote_file_size > 0: if config.minFileSize != 0 and remote_file_size <= config.minFileSize: result = PixivConstant.PIXIVUTIL_SIZE_LIMIT_SMALLER return (result, filename_save) if config.maxFileSize != 0 and remote_file_size >= config.maxFileSize: result = PixivConstant.PIXIVUTIL_SIZE_LIMIT_LARGER return (result, filename_save) # check if existing ugoira file exists if filename.endswith(".zip"): # non-converted zip (no animation.json) if os.path.isfile(filename_save): old_size = os.path.getsize(filename_save) # update for #451, always return identical? check_result = PixivHelper.check_file_exists( overwrite, filename_save, remote_file_size, old_size, backup_old_file) if config.createUgoira: handle_ugoira(image, filename_save, config, notifier) return (check_result, filename) # converted to ugoira (has animation.json) ugo_name = filename[:-4] + ".ugoira" if os.path.isfile(ugo_name): old_size = PixivHelper.get_ugoira_size(ugo_name) check_result = PixivHelper.check_file_exists( overwrite, ugo_name, remote_file_size, old_size, backup_old_file) if check_result != PixivConstant.PIXIVUTIL_OK: # try to convert existing file. handle_ugoira(image, filename_save, config, notifier) return (check_result, filename) elif os.path.isfile(filename_save): # other image? files old_size = os.path.getsize(filename_save) check_result = PixivHelper.check_file_exists( overwrite, filename, remote_file_size, old_size, backup_old_file) if check_result != PixivConstant.PIXIVUTIL_OK: return (check_result, filename) # check based on filename stored in DB using image id if image is not None: db_filename = None if page is not None: row = db.selectImageByImageIdAndPage( image.imageId, page) if row is not None: db_filename = row[2] else: row = db.selectImageByImageId(image.imageId) if row is not None: db_filename = row[3] if db_filename is not None and os.path.isfile(db_filename): old_size = os.path.getsize(db_filename) # if file_size < 0: # file_size = get_remote_filesize(url, referer) check_result = PixivHelper.check_file_exists( overwrite, db_filename, remote_file_size, old_size, backup_old_file) if check_result != PixivConstant.PIXIVUTIL_OK: ugo_name = None if db_filename.endswith(".zip"): ugo_name = filename[:-4] + ".ugoira" if config.createUgoira: handle_ugoira(image, db_filename, config, notifier) if db_filename.endswith(".ugoira"): ugo_name = db_filename handle_ugoira(image, db_filename, config, notifier) return (check_result, db_filename) # actual download notifier(type="DOWNLOAD", message=f"Start downloading {url} to {filename_save}") (downloadedSize, filename_save) = perform_download(url, remote_file_size, filename_save, overwrite, config, referer) # set last-modified and last-accessed timestamp if image is not None and config.setLastModified and filename_save is not None and os.path.isfile( filename_save): ts = time.mktime(image.worksDateDateTime.timetuple()) os.utime(filename_save, (ts, ts)) # check the downloaded file size again if remote_file_size > 0 and downloadedSize != remote_file_size: raise PixivException(f"Incomplete Downloaded for {url}", PixivException.DOWNLOAD_FAILED_OTHER) elif config.verifyImage and filename_save.endswith( (".jpg", ".png", ".gif")): fp = None try: from PIL import Image, ImageFile fp = open(filename_save, "rb") # Fix Issue #269, refer to https://stackoverflow.com/a/42682508 ImageFile.LOAD_TRUNCATED_IMAGES = True img = Image.open(fp) img.load() fp.close() PixivHelper.print_and_log('info', ' Image verified.') except BaseException: if fp is not None: fp.close() PixivHelper.print_and_log( 'info', ' Image invalid, deleting...') os.remove(filename_save) raise elif config.verifyImage and filename_save.endswith( (".ugoira", ".zip")): fp = None try: import zipfile fp = open(filename_save, "rb") zf = zipfile.ZipFile(fp) check_result = None try: check_result = zf.testzip() except RuntimeError as e: if 'encrypted' in str(e): PixivHelper.print_and_log( 'info', ' archive is encrypted, cannot verify.') else: raise fp.close() if check_result is None: PixivHelper.print_and_log('info', ' Image verified.') else: PixivHelper.print_and_log( 'info', f' Corrupted file in archive: {check_result}.') raise PixivException( f"Incomplete Downloaded for {url}", PixivException.DOWNLOAD_FAILED_OTHER) except BaseException: if fp is not None: fp.close() PixivHelper.print_and_log( 'info', ' Image invalid, deleting...') os.remove(filename_save) raise else: PixivHelper.print_and_log('info', ' done.') # write to downloaded lists if caller.start_iv or config.createDownloadLists: dfile = codecs.open(caller.dfilename, 'a+', encoding='utf-8') dfile.write(filename_save + "\n") dfile.close() return (PixivConstant.PIXIVUTIL_OK, filename_save) except urllib.error.HTTPError as httpError: PixivHelper.print_and_log( 'error', f'[download_image()] HTTP Error: {httpError} at {url}') if httpError.code == 404 or httpError.code == 502 or httpError.code == 500: return (PixivConstant.PIXIVUTIL_NOT_OK, None) temp_error_code = PixivException.DOWNLOAD_FAILED_NETWORK raise except urllib.error.URLError as urlError: PixivHelper.print_and_log( 'error', f'[download_image()] URL Error: {urlError} at {url}') temp_error_code = PixivException.DOWNLOAD_FAILED_NETWORK raise except IOError as ioex: if ioex.errno == 28: PixivHelper.print_and_log('error', str(ioex)) input("Press Enter to retry.") continue temp_error_code = PixivException.DOWNLOAD_FAILED_IO raise except KeyboardInterrupt: PixivHelper.print_and_log('info', 'Aborted by user request => Ctrl-C') return (PixivConstant.PIXIVUTIL_ABORTED, None) finally: if res is not None: del res if req is not None: del req except BaseException: if temp_error_code is None: temp_error_code = PixivException.DOWNLOAD_FAILED_OTHER caller.ERROR_CODE = temp_error_code exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback) PixivHelper.print_and_log( 'error', f'Error at download_image(): {sys.exc_info()} at {url} ({caller.ERROR_CODE})' ) if retry_count < max_retry: retry_count = retry_count + 1 PixivHelper.print_and_log(None, f"\rRetrying [{retry_count}]...", newline=False) PixivHelper.print_delay(config.retryWait) else: raise
def process_novel(caller, config, novel_id, notifier=None): if notifier is None: notifier = PixivHelper.dummy_notifier msg = Fore.YELLOW + Style.BRIGHT + f'Processing Novel details: {novel_id}' + Style.RESET_ALL PixivHelper.print_and_log('info', msg) # check if already downloaded before and overwrite is not enabled db_result = caller.__dbManager__.selectNovelPostByPostId(novel_id) if db_result is not None and not config.overwrite and not config.checkLastModified and not config.alwaysCheckFileSize: save_name = db_result[2] # save_name PixivHelper.print_and_log('warn', f"Novel already downloaded : {save_name}") return novel = caller.__br__.getNovelPage(novel_id) PixivHelper.print_and_log(None, f"Title : {novel.imageTitle}") PixivHelper.print_and_log(None, f'Member Name : {novel.artist.artistName}') PixivHelper.print_and_log(None, f'Member Avatar: {novel.artist.artistAvatar}') PixivHelper.print_and_log(None, f'Member Token : {novel.artist.artistToken}') PixivHelper.print_and_log( None, f'Member Background : {novel.artist.artistBackground}') tags_str = ', '.join(novel.imageTags) PixivHelper.print_and_log(None, f"Tags : {tags_str}") PixivHelper.print_and_log(None, f"Date : {novel.worksDateDateTime}") PixivHelper.print_and_log(None, f"Mode : {novel.imageMode}") PixivHelper.print_and_log(None, f"Bookmark Count : {novel.bookmark_count}") # fake the fileUrl fileUrl = f"https://www.pixiv.net/ajax/novel/{novel_id}.html" filename = PixivHelper.make_filename( config.filenameFormatNovel, novel, tagsSeparator=config.tagsSeparator, tagsLimit=config.tagsLimit, fileUrl=fileUrl, bookmark=False, searchTags="", useTranslatedTag=config.useTranslatedTag, tagTranslationLocale=config.tagTranslationLocale) filename = PixivHelper.sanitize_filename(filename, config.rootDirectory) PixivHelper.print_and_log(None, f"Filename : {filename}") # checking logic if os.path.exists(filename): if config.checkLastModified: local_timestamp = os.path.getmtime(filename) remote_timestamp = time.mktime(novel.worksDateDateTime.timetuple()) if local_timestamp == remote_timestamp: PixivHelper.print_and_log( 'warn', f"\rLocal file timestamp match with remote: {filename} => {novel.worksDateDateTime}" ) return if config.alwaysCheckFileSize: temp_filename = filename + ".!tmp" novel.write_content(temp_filename) file_size = os.path.getsize(temp_filename) old_size = os.path.getsize(filename) result = PixivHelper.check_file_exists(config.overwrite, filename, file_size, old_size, config.backupOldFile) if result == PixivConstant.PIXIVUTIL_OK: os.rename(temp_filename, filename) else: os.remove(temp_filename) else: novel.write_content(filename) if config.setLastModified and filename is not None and os.path.isfile( filename): ts = time.mktime(novel.worksDateDateTime.timetuple()) os.utime(filename, (ts, ts)) caller.__dbManager__.insertNovelPost(novel, filename) print()