Esempio n. 1
0
 def handleDebugMediumPage(self, response, imageId):
     if self._config.enableDump:
         if self._config.dumpMediumPage:
             dump_filename = "Medium Page for Image Id {0}.html".format(imageId)
             PixivHelper.dump_html(dump_filename, response)
             PixivHelper.print_and_log('info', 'Dumping html to: {0}'.format(dump_filename))
         if self._config.debugHttp:
             PixivHelper.safePrint(u"reply: {0}".format(response))
Esempio n. 2
0
 def handleDebugTagSearchPage(self, response, url):
     if self._config.enableDump:
         if self._config.dumpTagSearchPage:
             dump_filename = "TagSearch Page for {0}.html".format(url)
             PixivHelper.dump_html(dump_filename, response)
             PixivHelper.print_and_log(
                 'info', 'Dumping html to: {0}'.format(dump_filename))
         if self._config.debugHttp:
             PixivHelper.safePrint(u"reply: {0}".format(
                 PixivHelper.toUnicode(response)))
Esempio n. 3
0
def process_new_illust_from_bookmark(caller,
                                     config,
                                     page_num=1,
                                     end_page_num=0,
                                     bookmark_count=-1):
    br: PixivBrowser = caller.__br__
    parsed_page = None
    try:
        print("Processing New Illust from bookmark")
        i = page_num
        image_count = 1
        flag = True
        while flag:
            print(f"Page #{i}")
            mode = "all"
            if config.r18mode:
                mode = "r18"
            pb = br.getFollowedNewIllusts(mode, current_page=i)

            for image_id in pb.imageList:
                print(f"Image #{image_count}")
                result = PixivImageHandler.process_image(
                    caller,
                    config,
                    artist=None,
                    image_id=int(image_id),
                    bookmark_count=bookmark_count)
                image_count = image_count + 1

                if result == PixivConstant.PIXIVUTIL_SKIP_OLDER:
                    flag = False
                    break

                PixivHelper.wait(result, config)
            i = i + 1

            # page.close()
            # parsed_page.decompose()
            # del parsed_page

            if (end_page_num != 0 and i > end_page_num) or pb.isLastPage:
                print("Limit or last page reached.")
                flag = False

        print("Done.")
    except KeyboardInterrupt:
        raise
    except BaseException:
        PixivHelper.print_and_log(
            'error', 'Error at process_new_illust_from_bookmark(): {0}'.format(
                sys.exc_info()))
        if parsed_page is not None:
            filename = "Dump for New Illust from bookmark.html"
            PixivHelper.dump_html(filename, parsed_page)
        raise
Esempio n. 4
0
    def login(self, username, password):
        parsed = None
        try:
            PixivHelper.print_and_log('info', 'Logging in...')
            url = "https://accounts.pixiv.net/login"
            # get the post key
            res = self.open_with_retry(url)
            parsed = BeautifulSoup(res, features="html5lib")
            post_key = parsed.find('input', attrs={'name': 'post_key'})
            # js_init_config = self._getInitConfig(parsed)
            res.close()

            data = {}
            data['pixiv_id'] = username
            data['password'] = password
            # data['captcha'] = ''
            # data['g_recaptcha_response'] = ''
            data['return_to'] = 'https://www.pixiv.net'
            data['lang'] = 'en'
            data['post_key'] = post_key['value']
            data['source'] = "accounts"
            data['ref'] = ''

            request = mechanize.Request(
                "https://accounts.pixiv.net/api/login?lang=en",
                data,
                method='POST')
            response = self.open_with_retry(request)

            result = self.processLoginResult(response, username, password)
            response.close()
            return result
        except BaseException:
            traceback.print_exc()
            PixivHelper.print_and_log(
                'error', 'Error at login(): {0}'.format(sys.exc_info()))
            PixivHelper.dump_html("login_error.html", str(parsed))
            raise
        finally:
            if parsed is not None:
                parsed.decompose()
                del parsed
Esempio n. 5
0
    def getSearchTagPage(self, tags,
                         current_page,
                         wild_card=True,
                         title_caption=False,
                         start_date=None,
                         end_date=None,
                         member_id=None,
                         oldest_first=False,
                         start_page=1,
                         include_bookmark_data=False):
        response_page = None
        result = None
        url = ''

        if member_id is not None:
            # from member id search by tags
            (artist, response_page) = self.getMemberPage(
                member_id, current_page, False, tags)

            # convert to PixivTags
            result = PixivTags()
            result.parseMemberTags(artist, member_id, tags)
        else:
            # search by tags
            url = PixivHelper.generate_search_tag_url(tags, current_page,
                                                      title_caption,
                                                      wild_card,
                                                      oldest_first,
                                                      start_date,
                                                      end_date,
                                                      member_id,
                                                      self._config.r18mode)

            PixivHelper.print_and_log('info', 'Looping... for {0}'.format(url))
            response_page = self.getPixivPage(url, returnParsed=False)
            self.handleDebugTagSearchPage(response_page, url)

            result = None
            if member_id is not None:
                result = PixivTags()
                parse_search_page = BeautifulSoup(response_page, features="html5lib")
                result.parseMemberTags(parse_search_page, member_id, tags)
                parse_search_page.decompose()
                del parse_search_page
            else:
                try:
                    result = PixivTags()
                    result.parseTags(response_page, tags, current_page)

                    # parse additional information
                    if include_bookmark_data:
                        idx = 0
                        print("Retrieving bookmark information...", end=' ')
                        for image in result.itemList:
                            idx = idx + 1
                            print("\r", end=' ')
                            print("Retrieving bookmark information... [{0}] of [{1}]".format(
                                idx, len(result.itemList)), end=' ')

                            img_url = "https://www.pixiv.net/ajax/illust/{0}".format(
                                image.imageId)
                            response_page = self._get_from_cache(img_url)
                            if response_page is None:
                                try:
                                    res = self.open_with_retry(img_url)
                                    response_page = res.read()
                                    res.close()
                                except urllib.error.HTTPError as ex:
                                    if ex.code == 404:
                                        response_page = ex.read()
                                self._put_to_cache(img_url, response_page)

                            image_info_js = json.loads(response_page)
                            image.bookmarkCount = int(
                                image_info_js["body"]["bookmarkCount"])
                            image.imageResponse = int(
                                image_info_js["body"]["responseCount"])
                    print("")
                except BaseException:
                    PixivHelper.dump_html("Dump for SearchTags " + tags + ".html", response_page)
                    raise

        return (result, response_page)
def process_tags(caller,
                 tags,
                 page=1,
                 end_page=0,
                 wild_card=True,
                 title_caption=False,
                 start_date=None,
                 end_date=None,
                 use_tags_as_dir=False,
                 member_id=None,
                 bookmark_count=None,
                 oldest_first=False,
                 type_mode=None,
                 notifier=None,
                 job_option=None):
    # caller function/method
    # TODO: ideally to be removed or passed as argument
    config = caller.__config__
    config.loadConfig(path=caller.configfile)

    if notifier is None:
        notifier = PixivHelper.dummy_notifier

    # override the config source if job_option is give for filename formats
    format_src = config
    if job_option is not None:
        format_src = job_option

    search_page = None
    _last_search_result = None
    i = page
    updated_limit_count = 0

    try:
        search_tags = PixivHelper.decode_tags(tags)

        if use_tags_as_dir:
            PixivHelper.print_and_log(None, "Save to each directory using query tags.")
            format_src.rootDirectory += os.sep + PixivHelper.sanitize_filename(search_tags)

        tags = PixivHelper.encode_tags(tags)

        images = 1
        last_image_id = -1
        skipped_count = 0
        use_bookmark_data = False
        if bookmark_count is not None and bookmark_count > 0:
            use_bookmark_data = True

        offset = 60
        start_offset = (page - 1) * offset
        stop_offset = end_page * offset

        PixivHelper.print_and_log('info', f'Searching for: ({search_tags}) {tags}')
        flag = True
        while flag:
            (t, search_page) = PixivBrowserFactory.getBrowser().getSearchTagPage(tags,
                                                                                 i,
                                                                                 wild_card,
                                                                                 title_caption,
                                                                                 start_date,
                                                                                 end_date,
                                                                                 member_id,
                                                                                 oldest_first,
                                                                                 page,
                                                                                 use_bookmark_data,
                                                                                 bookmark_count,
                                                                                 type_mode,
                                                                                 r18mode=format_src.r18mode)
            if len(t.itemList) == 0:
                PixivHelper.print_and_log(None, 'No more images')
                flag = False
            elif _last_search_result is not None:
                set1 = set((x.imageId) for x in _last_search_result.itemList)
                difference = [x for x in t.itemList if (x.imageId) not in set1]
                if len(difference) == 0:
                    PixivHelper.print_and_log(None, 'Getting duplicated result set, no more new images.')
                    flag = False

            if flag:
                for item in t.itemList:
                    last_image_id = item.imageId
                    PixivHelper.print_and_log(None, f'Image #{images}')
                    PixivHelper.print_and_log(None, f'Image Id: {item.imageId}')

                    if bookmark_count is not None and bookmark_count > item.bookmarkCount:
                        PixivHelper.print_and_log(None, f'Bookmark Count: {item.bookmarkCount}')
                        PixivHelper.print_and_log('info', f'Skipping imageId= {item.imageId} because less than bookmark count limit ({bookmark_count} > {item.bookmarkCount}).')
                        skipped_count = skipped_count + 1
                        continue

                    result = 0
                    while True:
                        try:
                            if t.availableImages > 0:
                                # PixivHelper.print_and_log(None, "Total Images: " + str(t.availableImages))
                                total_image = t.availableImages
                                if(stop_offset > 0 and stop_offset < total_image):
                                    total_image = stop_offset
                                total_image = total_image - start_offset
                                # PixivHelper.print_and_log(None, "Total Images Offset: " + str(total_image))
                            else:
                                total_image = ((i - 1) * 20) + len(t.itemList)
                            title_prefix = "Tags:{0} Page:{1} Image {2}+{3} of {4}".format(tags, i, images, skipped_count, total_image)
                            if member_id is not None:
                                title_prefix = "MemberId: {0} Tags:{1} Page:{2} Image {3}+{4} of {5}".format(member_id,
                                                                                                             tags,
                                                                                                             i,
                                                                                                             images,
                                                                                                             skipped_count,
                                                                                                             total_image)
                            result = PixivConstant.PIXIVUTIL_OK
                            if not caller.DEBUG_SKIP_PROCESS_IMAGE:
                                result = PixivImageHandler.process_image(caller,
                                                                         config,
                                                                         None,
                                                                         item.imageId,
                                                                         user_dir=format_src.rootDirectory,
                                                                         search_tags=search_tags,
                                                                         title_prefix=title_prefix,
                                                                         bookmark_count=item.bookmarkCount,
                                                                         image_response_count=item.imageResponse,
                                                                         notifier=notifier,
                                                                         job_option=job_option)
                                PixivHelper.wait(result, config)
                            break
                        except KeyboardInterrupt:
                            result = PixivConstant.PIXIVUTIL_KEYBOARD_INTERRUPT
                            break
                        except http.client.BadStatusLine:
                            PixivHelper.print_and_log(None, "Stuff happened, trying again after 2 second...")
                            time.sleep(2)

                    images = images + 1
                    if result in (PixivConstant.PIXIVUTIL_SKIP_DUPLICATE,
                                  PixivConstant.PIXIVUTIL_SKIP_LOCAL_LARGER,
                                  PixivConstant.PIXIVUTIL_SKIP_DUPLICATE_NO_WAIT):
                        updated_limit_count = updated_limit_count + 1
                        if config.checkUpdatedLimit != 0 and updated_limit_count >= config.checkUpdatedLimit:
                            PixivHelper.print_and_log(None, f"Skipping tags: {tags}")
                            PixivBrowserFactory.getBrowser().clear_history()
                            return
                        gc.collect()
                        continue
                    elif result == PixivConstant.PIXIVUTIL_KEYBOARD_INTERRUPT:
                        choice = input("Keyboard Interrupt detected, continue to next image (Y/N)").rstrip("\r")
                        if choice.upper() == 'N':
                            PixivHelper.print_and_log("info", f"Tags: {tags}, processing aborted.")
                            flag = False
                            break
                        else:
                            continue

            PixivBrowserFactory.getBrowser().clear_history()

            i = i + 1
            _last_search_result = t

            if end_page != 0 and end_page < i:
                PixivHelper.print_and_log('info', f"End Page reached: {end_page}")
                flag = False
            if t.isLastPage:
                PixivHelper.print_and_log('info', f"Last page: {i - 1}")
                flag = False
            if config.enableInfiniteLoop and i == 1001 and not oldest_first:
                if last_image_id > 0:
                    # get the last date
                    PixivHelper.print_and_log('info', f"Hit page 1000, trying to get workdate for last image id: {last_image_id}.")
                    # referer = 'https://www.pixiv.net/en/artworks/{0}'.format(last_image_id)
                    result = PixivBrowserFactory.getBrowser().getImagePage(last_image_id)
                    _last_date = result[0].worksDateDateTime
                    # _start_date = image.worksDateDateTime + datetime.timedelta(365)
                    # hit the last page
                    i = 1
                    end_date = _last_date.strftime("%Y-%m-%d")
                    PixivHelper.print_and_log('info', f"Hit page 1000, looping back to page 1 with ecd: {end_date}.")
                    flag = True
                    last_image_id = -1
                else:
                    PixivHelper.print_and_log('info', "No more image in the list.")
                    flag = False

        PixivHelper.print_and_log(None, 'done')
        if search_page is not None:
            del search_page
    except KeyboardInterrupt:
        raise
    except BaseException:
        PixivHelper.print_and_log('error', f'Error at process_tags() at page {i}: {sys.exc_info()}')
        try:
            if search_page is not None:
                dump_filename = f'Error page for search tags {tags} at page {i}.html'
                PixivHelper.dump_html(dump_filename, search_page)
                PixivHelper.print_and_log('error', f"Dumping html to: {dump_filename}")
        except BaseException:
            PixivHelper.print_and_log('error', f'Cannot dump page for search tags: {search_tags}')
        raise
Esempio n. 7
0
def process_from_group(caller,
                       config,
                       group_id,
                       limit=0,
                       process_external=True):
    br = caller.__br__
    json_response = None
    try:
        print("Download by Group Id")
        if limit != 0:
            print("Limit: {0}".format(limit))
        if process_external:
            print("Include External Image: {0}".format(process_external))

        max_id = 0
        image_count = 0
        flag = True
        while flag:
            url = "https://www.pixiv.net/group/images.php?format=json&max_id={0}&id={1}".format(
                max_id, group_id)
            PixivHelper.print_and_log('info',
                                      "Getting images from: {0}".format(url))
            response = br.open(url)
            json_response = response.read()
            response.close()
            group_data = PixivGroup(json_response)
            max_id = group_data.maxId
            if group_data.imageList is not None and len(
                    group_data.imageList) > 0:
                for image in group_data.imageList:
                    if image_count > limit and limit != 0:
                        flag = False
                        break
                    print("Image #{0}".format(image_count))
                    print("ImageId: {0}".format(image))
                    result = PixivImageHandler.process_image(caller,
                                                             config,
                                                             image_id=image)
                    image_count = image_count + 1
                    PixivHelper.wait(result, config)

            if process_external and group_data.externalImageList is not None and len(
                    group_data.externalImageList) > 0:
                for image_data in group_data.externalImageList:
                    if image_count > limit and limit != 0:
                        flag = False
                        break
                    print("Image #{0}".format(image_count))
                    print("Member Id   : {0}".format(
                        image_data.artist.artistId))
                    PixivHelper.safePrint("Member Name  : " +
                                          image_data.artist.artistName)
                    print("Member Token : {0}".format(
                        image_data.artist.artistToken))
                    print("Image Url   : {0}".format(image_data.imageUrls[0]))

                    filename = PixivHelper.make_filename(
                        config.filenameFormat,
                        imageInfo=image_data,
                        tagsSeparator=config.tagsSeparator,
                        tagsLimit=config.tagsLimit,
                        fileUrl=image_data.imageUrls[0],
                        useTranslatedTag=config.useTranslatedTag,
                        tagTranslationLocale=config.tagTranslationLocale)
                    filename = PixivHelper.sanitize_filename(
                        filename, config.rootDirectory)
                    PixivHelper.safePrint("Filename  : " + filename)
                    (result, filename) = PixivDownloadHandler.download_image(
                        caller,
                        image_data.imageUrls[0],
                        filename,
                        url,
                        config.overwrite,
                        config.retry,
                        backup_old_file=config.backupOldFile)
                    PixivHelper.get_logger().debug("Download %s result: %s",
                                                   filename, result)
                    if config.setLastModified and filename is not None and os.path.isfile(
                            filename):
                        ts = time.mktime(
                            image_data.worksDateDateTime.timetuple())
                        os.utime(filename, (ts, ts))

                    image_count = image_count + 1

            if (group_data.imageList is None or len(group_data.imageList) == 0) and \
               (group_data.externalImageList is None or len(group_data.externalImageList) == 0):
                flag = False
            print("")

    except BaseException:
        PixivHelper.print_and_log(
            'error',
            'Error at process_from_group(): {0}'.format(sys.exc_info()))
        if json_response is not None:
            filename = f"Dump for Download by Group {group_id}.json"
            PixivHelper.dump_html(filename, json_response)
        raise
Esempio n. 8
0
def process_new_illust_from_bookmark(caller,
                                     config,
                                     page_num=1,
                                     end_page_num=0):
    br = caller.__br__
    parsed_page = None
    try:
        print("Processing New Illust from bookmark")
        i = page_num
        image_count = 1
        flag = True
        while flag:
            print("Page #" + str(i))
            url = 'https://www.pixiv.net/bookmark_new_illust.php?p=' + str(i)
            if config.r18mode:
                url = 'https://www.pixiv.net/bookmark_new_illust_r18.php?p=' + str(
                    i)

            PixivHelper.print_and_log('info', "Source URL: " + url)
            page = br.open(url)
            parsed_page = BeautifulSoup(page.read().decode("utf-8"),
                                        features="html5lib")
            pb = PixivNewIllustBookmark(parsed_page)
            if not pb.haveImages:
                print("No images!")
                break

            for image_id in pb.imageList:
                print("Image #" + str(image_count))
                result = PixivImageHandler.process_image(
                    caller, config, artist=None, image_id=int(image_id))
                image_count = image_count + 1

                if result == PixivConstant.PIXIVUTIL_SKIP_OLDER:
                    flag = False
                    break

                PixivHelper.wait(result, config)
            i = i + 1

            page.close()
            parsed_page.decompose()
            del parsed_page

            # Non premium is only limited to 100 page
            # Premium user might be limited to 5000, refer to issue #112
            if (end_page_num != 0
                    and i > end_page_num) or i > 5000 or pb.isLastPage:
                print("Limit or last page reached.")
                flag = False

        print("Done.")
    except KeyboardInterrupt:
        raise
    except BaseException:
        PixivHelper.print_and_log(
            'error', 'Error at process_new_illust_from_bookmark(): {0}'.format(
                sys.exc_info()))
        if parsed_page is not None:
            filename = "Dump for New Illust from bookmark.html"
            PixivHelper.dump_html(filename, parsed_page)
        raise
Esempio n. 9
0
def process_image(caller,
                  config,
                  artist=None,
                  image_id=None,
                  user_dir='',
                  bookmark=False,
                  search_tags='',
                  title_prefix="",
                  bookmark_count=-1,
                  image_response_count=-1,
                  notifier=None,
                  job_option=None):
    # caller function/method
    # TODO: ideally to be removed or passed as argument
    db = caller.__dbManager__

    if notifier is None:
        notifier = PixivHelper.dummy_notifier

    # override the config source if job_option is give for filename formats
    format_src = config
    if job_option is not None:
        format_src = job_option

    parse_medium_page = None
    image = None
    result = None
    referer = f'https://www.pixiv.net/artworks/{image_id}'
    filename = f'no-filename-{image_id}.tmp'

    try:
        msg = Fore.YELLOW + Style.NORMAL + f'Processing Image Id: {image_id}' + Style.RESET_ALL
        PixivHelper.print_and_log(None, msg)
        notifier(type="IMAGE", message=msg)

        # check if already downloaded. images won't be downloaded twice - needed in process_image to catch any download
        r = db.selectImageByImageId(image_id, cols='save_name')
        exists = False
        in_db = False
        if r is not None:
            exists = db.cleanupFileExists(r[0])
            in_db = True

        # skip if already recorded in db and alwaysCheckFileSize is disabled and overwrite is disabled.
        if in_db and not config.alwaysCheckFileSize and not config.overwrite:
            PixivHelper.print_and_log(None, f'Already downloaded in DB: {image_id}')
            gc.collect()
            return PixivConstant.PIXIVUTIL_SKIP_DUPLICATE_NO_WAIT

        # get the medium page
        try:
            (image, parse_medium_page) = PixivBrowserFactory.getBrowser().getImagePage(image_id=image_id,
                                                                                       parent=artist,
                                                                                       from_bookmark=bookmark,
                                                                                       bookmark_count=bookmark_count)
            if len(title_prefix) > 0:
                caller.set_console_title(f"{title_prefix} ImageId: {image.imageId}")
            else:
                caller.set_console_title(f"MemberId: {image.artist.artistId} ImageId: {image.imageId}")

        except PixivException as ex:
            caller.ERROR_CODE = ex.errorCode
            caller.__errorList.append(dict(type="Image", id=str(image_id), message=ex.message, exception=ex))
            if ex.errorCode == PixivException.UNKNOWN_IMAGE_ERROR:
                PixivHelper.print_and_log('error', ex.message)
            elif ex.errorCode == PixivException.SERVER_ERROR:
                PixivHelper.print_and_log('error', f'Giving up image_id (medium): {image_id}')
            elif ex.errorCode > 2000:
                PixivHelper.print_and_log('error', f'Image Error for {image_id}: {ex.message}')
            if parse_medium_page is not None:
                dump_filename = f'Error medium page for image {image_id}.html'
                PixivHelper.dump_html(dump_filename, parse_medium_page)
                PixivHelper.print_and_log('error', f'Dumping html to: {dump_filename}')
            else:
                PixivHelper.print_and_log('error', f'Image ID ({image_id}): {ex}')
            PixivHelper.print_and_log('error', f'Stack Trace: {sys.exc_info()}')
            return PixivConstant.PIXIVUTIL_NOT_OK
        except Exception as ex:
            PixivHelper.print_and_log('error', f'Image ID ({image_id}): {ex}')
            if parse_medium_page is not None:
                dump_filename = f'Error medium page for image {image_id}.html'
                PixivHelper.dump_html(dump_filename, parse_medium_page)
                PixivHelper.print_and_log('error', f'Dumping html to: {dump_filename}')
            PixivHelper.print_and_log('error', f'Stack Trace: {sys.exc_info()}')
            exc_type, exc_value, exc_traceback = sys.exc_info()
            traceback.print_exception(exc_type, exc_value, exc_traceback)
            return PixivConstant.PIXIVUTIL_NOT_OK

        download_image_flag = True

        # date validation and blacklist tag validation
        if config.dateDiff > 0:
            if image.worksDateDateTime != datetime.datetime.fromordinal(1).replace(tzinfo=datetime_z.utc):
                if image.worksDateDateTime < (datetime.datetime.today() - datetime.timedelta(config.dateDiff)).replace(tzinfo=datetime_z.utc):
                    PixivHelper.print_and_log('info', f'Skipping image_id: {image_id} because contains older than: {config.dateDiff} day(s).')
                    download_image_flag = False
                    result = PixivConstant.PIXIVUTIL_SKIP_OLDER

        if config.useBlacklistMembers and download_image_flag:
            if str(image.originalArtist.artistId) in caller.__blacklistMembers:
                PixivHelper.print_and_log('info', f'Skipping image_id: {image_id} because contains blacklisted member id: {image.originalArtist.artistId}')
                download_image_flag = False
                result = PixivConstant.PIXIVUTIL_SKIP_BLACKLIST

        if config.useBlacklistTags and download_image_flag:
            for item in caller.__blacklistTags:
                if item in image.imageTags:
                    PixivHelper.print_and_log('info', f'Skipping image_id: {image_id} because contains blacklisted tags: {item}')
                    download_image_flag = False
                    result = PixivConstant.PIXIVUTIL_SKIP_BLACKLIST
                    break

        if config.useBlacklistTitles and download_image_flag:
            for item in caller.__blacklistTitles:
                if item in image.imageTitle:
                    PixivHelper.print_and_log('info', f'Skipping image_id: {image_id} because contains blacklisted Title: {item}')
                    download_image_flag = False
                    result = PixivConstant.PIXIVUTIL_SKIP_BLACKLIST
                    break

        if download_image_flag and not caller.DEBUG_SKIP_DOWNLOAD_IMAGE:
            if artist is None:
                PixivHelper.print_and_log(None, f'Member Name  : {image.artist.artistName}')
                PixivHelper.print_and_log(None, f'Member Avatar: {image.artist.artistAvatar}')
                PixivHelper.print_and_log(None, f'Member Token : {image.artist.artistToken}')
                PixivHelper.print_and_log(None, f'Member Background : {image.artist.artistBackground}')
            PixivHelper.print_and_log(None, f"Title: {image.imageTitle}")
            tags_str = ', '.join(image.imageTags)
            PixivHelper.print_and_log(None, f"Tags : {tags_str}")
            PixivHelper.print_and_log(None, f"Date : {image.worksDateDateTime}")
            PixivHelper.print_and_log(None, f"Mode : {image.imageMode}")

            # get bookmark count
            if ("%bookmark_count%" in format_src.filenameFormat or "%image_response_count%" in format_src.filenameFormat) and image.bookmark_count == -1:
                PixivHelper.print_and_log(None, "Parsing bookmark page", end=' ')
                bookmark_url = f'https://www.pixiv.net/bookmark_detail.php?illust_id={image_id}'
                parse_bookmark_page = PixivBrowserFactory.getBrowser().getPixivPage(bookmark_url)
                image.ParseBookmarkDetails(parse_bookmark_page)
                parse_bookmark_page.decompose()
                del parse_bookmark_page
                PixivHelper.print_and_log(None, f"Bookmark Count : {image.bookmark_count}")
                caller.__br__.back()

            if config.useSuppressTags:
                for item in caller.__suppressTags:
                    if item in image.imageTags:
                        image.imageTags.remove(item)

            # get manga page
            if image.imageMode == 'manga':
                PixivHelper.print_and_log(None, f"Page Count : {image.imageCount}")

            if user_dir == '':  # Yavos: use config-options
                target_dir = format_src.rootDirectory
            else:  # Yavos: use filename from list
                target_dir = user_dir

            result = PixivConstant.PIXIVUTIL_OK
            manga_files = list()
            page = 0

            # Issue #639
            source_urls = image.imageUrls
            if config.downloadResized:
                source_urls = image.imageResizedUrls

            for img in source_urls:
                PixivHelper.print_and_log(None, f'Image URL : {img}')
                url = os.path.basename(img)
                split_url = url.split('.')
                if split_url[0].startswith(str(image_id)):

                    filename_format = format_src.filenameFormat
                    if image.imageMode == 'manga':
                        filename_format = format_src.filenameMangaFormat

                    filename = PixivHelper.make_filename(filename_format,
                                                         image,
                                                         tagsSeparator=config.tagsSeparator,
                                                         tagsLimit=config.tagsLimit,
                                                         fileUrl=url,
                                                         bookmark=bookmark,
                                                         searchTags=search_tags,
                                                         useTranslatedTag=config.useTranslatedTag,
                                                         tagTranslationLocale=config.tagTranslationLocale)
                    filename = PixivHelper.sanitize_filename(filename, target_dir)

                    if image.imageMode == 'manga' and config.createMangaDir:
                        manga_page = caller.__re_manga_page.findall(filename)
                        if len(manga_page) > 0:
                            splitted_filename = filename.split(manga_page[0][0], 1)
                            splitted_manga_page = manga_page[0][0].split("_p", 1)
                            # filename = splitted_filename[0] + splitted_manga_page[0] + os.sep + "_p" + splitted_manga_page[1] + splitted_filename[1]
                            filename = f"{splitted_filename[0]}{splitted_manga_page[0]}{os.sep}_p{splitted_manga_page[1]}{splitted_filename[1]}"

                    PixivHelper.print_and_log('info', f'Filename  : {filename}')

                    result = PixivConstant.PIXIVUTIL_NOT_OK
                    try:
                        (result, filename) = PixivDownloadHandler.download_image(caller,
                                                                                 img,
                                                                                 filename,
                                                                                 referer,
                                                                                 config.overwrite,
                                                                                 config.retry,
                                                                                 config.backupOldFile,
                                                                                 image,
                                                                                 page,
                                                                                 notifier)

                        if result == PixivConstant.PIXIVUTIL_NOT_OK:
                            PixivHelper.print_and_log('error', f'Image url not found/failed to download: {image.imageId}')
                        elif result == PixivConstant.PIXIVUTIL_ABORTED:
                            raise KeyboardInterrupt()

                        manga_files.append((image_id, page, filename))
                        page = page + 1

                    except urllib.error.URLError:
                        PixivHelper.print_and_log('error', f'Error when download_image(), giving up url: {img}')
                    PixivHelper.print_and_log(None, '')

            if config.writeImageInfo or config.writeImageJSON:
                filename_info_format = format_src.filenameInfoFormat or format_src.filenameFormat
                # Issue #575
                if image.imageMode == 'manga':
                    filename_info_format = format_src.filenameMangaInfoFormat or format_src.filenameMangaFormat or filename_info_format
                info_filename = PixivHelper.make_filename(filename_info_format,
                                                          image,
                                                          tagsSeparator=config.tagsSeparator,
                                                          tagsLimit=config.tagsLimit,
                                                          fileUrl=url,
                                                          appendExtension=False,
                                                          bookmark=bookmark,
                                                          searchTags=search_tags,
                                                          useTranslatedTag=config.useTranslatedTag,
                                                          tagTranslationLocale=config.tagTranslationLocale)
                info_filename = PixivHelper.sanitize_filename(info_filename, target_dir)
                # trim _pXXX
                info_filename = re.sub(r'_p?\d+$', '', info_filename)
                if config.writeImageInfo:
                    image.WriteInfo(info_filename + ".txt")
                if config.writeImageJSON:
                    image.WriteJSON(info_filename + ".json")

            if image.imageMode == 'ugoira_view':
                if config.writeUgoiraInfo:
                    image.WriteUgoiraData(filename + ".js")
                # Handle #451
                if config.createUgoira and (result in (PixivConstant.PIXIVUTIL_OK, PixivConstant.PIXIVUTIL_SKIP_DUPLICATE)):
                    PixivDownloadHandler.handle_ugoira(image, filename, config, notifier)

            if config.writeUrlInDescription:
                PixivHelper.write_url_in_description(image, config.urlBlacklistRegex, config.urlDumpFilename)

        if in_db and not exists:
            result = PixivConstant.PIXIVUTIL_CHECK_DOWNLOAD  # There was something in the database which had not been downloaded

        # Only save to db if all images is downloaded completely
        if result in (PixivConstant.PIXIVUTIL_OK,
                      PixivConstant.PIXIVUTIL_SKIP_DUPLICATE,
                      PixivConstant.PIXIVUTIL_SKIP_LOCAL_LARGER):
            try:
                db.insertImage(image.artist.artistId, image.imageId, image.imageMode)
            except BaseException:
                PixivHelper.print_and_log('error', f'Failed to insert image id:{image.imageId} to DB')

            db.updateImage(image.imageId, image.imageTitle, filename, image.imageMode)

            if len(manga_files) > 0:
                db.insertMangaImages(manga_files)

            # map back to PIXIVUTIL_OK (because of ugoira file check)
            result = 0

        if image is not None:
            del image
        if parse_medium_page is not None:
            del parse_medium_page
        gc.collect()
        PixivHelper.print_and_log(None, '\n')

        return result
    except Exception as ex:
        if isinstance(ex, KeyboardInterrupt):
            raise
        caller.ERROR_CODE = getattr(ex, 'errorCode', -1)
        exc_type, exc_value, exc_traceback = sys.exc_info()
        traceback.print_exception(exc_type, exc_value, exc_traceback)
        PixivHelper.print_and_log('error', f'Error at process_image(): {image_id}')
        PixivHelper.print_and_log('error', f'Exception: {sys.exc_info()}')

        if parse_medium_page is not None:
            dump_filename = f'Error medium page for image {image_id}.html'
            PixivHelper.dump_html(dump_filename, parse_medium_page)
            PixivHelper.print_and_log('error', f'Dumping html to: {dump_filename}')

        raise
Esempio n. 10
0
def process_member(caller,
                   config,
                   member_id,
                   user_dir='',
                   page=1,
                   end_page=0,
                   bookmark=False,
                   tags=None,
                   title_prefix="",
                   notifier=None,
                   job_option=None):
    # caller function/method
    # TODO: ideally to be removed or passed as argument
    db = caller.__dbManager__
    config.loadConfig(path=caller.configfile)
    np = caller.np
    np_is_valid = caller.np_is_valid

    if notifier is None:
        notifier = PixivHelper.dummy_notifier

    # override the config source if job_option is give for filename formats
    format_src = config
    if job_option is not None:
        format_src = job_option

    list_page = None

    msg = Fore.YELLOW + Style.BRIGHT + f'Processing Member Id: {member_id}' + Style.RESET_ALL
    PixivHelper.print_and_log('info', msg)
    notifier(type="MEMBER", message=msg)
    if page != 1:
        PixivHelper.print_and_log('info', 'Start Page: ' + str(page))
    if end_page != 0:
        PixivHelper.print_and_log('info', 'End Page: ' + str(end_page))
        if config.numberOfPage != 0:
            PixivHelper.print_and_log(
                'info', 'Number of page setting will be ignored')
    elif np != 0:
        PixivHelper.print_and_log('info',
                                  'End Page from command line: ' + str(np))
    elif config.numberOfPage != 0:
        PixivHelper.print_and_log(
            'info', 'End Page from config: ' + str(config.numberOfPage))

    # calculate the offset for display properties
    offset = 48  # new offset for AJAX call
    offset_start = (page - 1) * offset
    offset_stop = end_page * offset

    try:
        no_of_images = 1
        is_avatar_downloaded = False
        flag = True
        updated_limit_count = 0
        image_id = -1

        while flag:
            PixivHelper.print_and_log(None, 'Page ', page)
            caller.set_console_title(
                f"{title_prefix}MemberId: {member_id} Page: {page}")
            # Try to get the member page
            while True:
                try:
                    (artist, list_page
                     ) = PixivBrowserFactory.getBrowser().getMemberPage(
                         member_id,
                         page,
                         bookmark,
                         tags,
                         r18mode=format_src.r18mode)
                    break
                except PixivException as ex:
                    caller.ERROR_CODE = ex.errorCode
                    PixivHelper.print_and_log(
                        'info', f'Member ID ({member_id}): {ex}')
                    if ex.errorCode == PixivException.NO_IMAGES:
                        pass
                    else:
                        if list_page is None:
                            list_page = ex.htmlPage
                        if list_page is not None:
                            PixivHelper.dump_html(
                                f"Dump for {member_id} Error Code {ex.errorCode}.html",
                                list_page)
                        if ex.errorCode == PixivException.USER_ID_NOT_EXISTS or ex.errorCode == PixivException.USER_ID_SUSPENDED:
                            db.setIsDeletedFlagForMemberId(int(member_id))
                            PixivHelper.print_and_log(
                                'info',
                                f'Set IsDeleted for MemberId: {member_id} not exist.'
                            )
                            # db.deleteMemberByMemberId(member_id)
                            # PixivHelper.printAndLog('info', 'Deleting MemberId: ' + str(member_id) + ' not exist.')
                        if ex.errorCode == PixivException.OTHER_MEMBER_ERROR:
                            PixivHelper.print_and_log(None, ex.message)
                            caller.__errorList.append(
                                dict(type="Member",
                                     id=str(member_id),
                                     message=ex.message,
                                     exception=ex))
                    return
                except AttributeError:
                    # Possible layout changes, try to dump the file below
                    raise
                except BaseException:
                    exc_type, exc_value, exc_traceback = sys.exc_info()
                    traceback.print_exception(exc_type, exc_value,
                                              exc_traceback)
                    PixivHelper.print_and_log(
                        'error',
                        f'Error at processing Artist Info: {sys.exc_info()}')

            PixivHelper.print_and_log(None,
                                      f'Member Name  : {artist.artistName}')
            PixivHelper.print_and_log(None,
                                      f'Member Avatar: {artist.artistAvatar}')
            PixivHelper.print_and_log(None,
                                      f'Member Token : {artist.artistToken}')
            PixivHelper.print_and_log(
                None, f'Member Background : {artist.artistBackground}')
            print_offset_stop = offset_stop if offset_stop < artist.totalImages and offset_stop != 0 else artist.totalImages
            PixivHelper.print_and_log(
                None,
                f'Processing images from {offset_start + 1} to {print_offset_stop} of {artist.totalImages}'
            )

            if not is_avatar_downloaded and config.downloadAvatar:
                if user_dir == '':
                    target_dir = format_src.rootDirectory
                else:
                    target_dir = user_dir

                avatar_filename = PixivHelper.create_avatar_filename(
                    artist, target_dir, format_src)
                if not caller.DEBUG_SKIP_PROCESS_IMAGE:
                    if artist.artistAvatar.find('no_profile') == -1:
                        PixivDownloadHandler.download_image(
                            caller,
                            artist.artistAvatar,
                            avatar_filename,
                            "https://www.pixiv.net/",
                            config.overwrite,
                            config.retry,
                            config.backupOldFile,
                            notifier=notifier)
                    # Issue #508
                    if artist.artistBackground is not None and artist.artistBackground.startswith(
                            "http"):
                        bg_name = PixivHelper.create_bg_filename_from_avatar_filename(
                            avatar_filename)
                        PixivDownloadHandler.download_image(
                            caller,
                            artist.artistBackground,
                            bg_name,
                            "https://www.pixiv.net/",
                            config.overwrite,
                            config.retry,
                            config.backupOldFile,
                            notifier=notifier)
                        is_avatar_downloaded = True

            if config.autoAddMember:
                db.insertNewMember(int(member_id))

            db.updateMemberName(member_id, artist.artistName)

            if not artist.haveImages:
                PixivHelper.print_and_log('info',
                                          f"No image found for: {member_id}")
                db.updateLastDownloadDate(member_id)
                flag = False
                continue

            result = PixivConstant.PIXIVUTIL_NOT_OK
            for image_id in artist.imageList:
                PixivHelper.print_and_log(None, f'#{no_of_images}')

                retry_count = 0
                while True:
                    try:
                        if artist.totalImages > 0:
                            # PixivHelper.safePrint("Total Images = " + str(artist.totalImages))
                            total_image_page_count = artist.totalImages
                            if (offset_stop > 0
                                    and offset_stop < total_image_page_count):
                                total_image_page_count = offset_stop
                            total_image_page_count = total_image_page_count - offset_start
                            # PixivHelper.safePrint("Total Images Offset = " + str(total_image_page_count))
                        else:
                            total_image_page_count = (
                                (page - 1) * 20) + len(artist.imageList)
                        title_prefix_img = f"{title_prefix}MemberId: {member_id} Page: {page} Post {no_of_images}+{updated_limit_count} of {total_image_page_count}"
                        if not caller.DEBUG_SKIP_PROCESS_IMAGE:
                            result = PixivImageHandler.process_image(
                                caller,
                                config,
                                artist,
                                image_id,
                                user_dir,
                                bookmark,
                                title_prefix=title_prefix_img,
                                notifier=notifier,
                                job_option=job_option)

                        break
                    except KeyboardInterrupt:
                        result = PixivConstant.PIXIVUTIL_KEYBOARD_INTERRUPT
                        break
                    except BaseException:
                        if retry_count > config.retry:
                            PixivHelper.print_and_log(
                                'error', f"Giving up image_id: {image_id}")
                            return
                        retry_count = retry_count + 1
                        PixivHelper.print_and_log(
                            None,
                            f"Stuff happened, trying again after 2 second ({retry_count})"
                        )
                        exc_type, exc_value, exc_traceback = sys.exc_info()
                        traceback.print_exception(exc_type, exc_value,
                                                  exc_traceback)
                        PixivHelper.print_and_log(
                            "error",
                            f"Error at process_member(): {sys.exc_info()} Member Id: {member_id}"
                        )
                        time.sleep(2)

                if result in (PixivConstant.PIXIVUTIL_SKIP_DUPLICATE,
                              PixivConstant.PIXIVUTIL_SKIP_LOCAL_LARGER,
                              PixivConstant.PIXIVUTIL_SKIP_DUPLICATE_NO_WAIT):
                    updated_limit_count = updated_limit_count + 1
                    if config.checkUpdatedLimit != 0 and updated_limit_count >= config.checkUpdatedLimit:
                        PixivHelper.safePrint(f"Skipping member: {member_id}")
                        db.updateLastDownloadDate(member_id)
                        PixivBrowserFactory.getBrowser(
                            config=config).clear_history()
                        return
                    gc.collect()
                    continue
                if result == PixivConstant.PIXIVUTIL_KEYBOARD_INTERRUPT:
                    choice = input(
                        "Keyboard Interrupt detected, continue to next image (Y/N)"
                    ).rstrip("\r")
                    if choice.upper() == 'N':
                        PixivHelper.print_and_log(
                            "info", f"Member: {member_id}, processing aborted")
                        flag = False
                        break
                    else:
                        continue
                # return code from process image
                if result == PixivConstant.PIXIVUTIL_SKIP_OLDER:
                    PixivHelper.print_and_log(
                        "info",
                        "Reached older images, skippin to next member.")
                    db.updateLastDownloadDate(member_id)
                    flag = False
                    break

                no_of_images = no_of_images + 1
                PixivHelper.wait(result, config)

            if artist.isLastPage:
                db.updateLastDownloadDate(member_id)
                PixivHelper.print_and_log(None, "Last Page")
                flag = False

            page = page + 1

            # page limit checking
            if end_page > 0 and page > end_page:
                PixivHelper.print_and_log(
                    None,
                    f"Page limit reached (from endPage limit ={end_page})")
                db.updateLastDownloadDate(member_id)
                flag = False
            else:
                if np_is_valid:  # Yavos: overwriting config-data
                    if page > np and np > 0:
                        PixivHelper.print_and_log(
                            None,
                            f"Page limit reached (from command line ={np})")
                        flag = False
                elif page > config.numberOfPage and config.numberOfPage > 0:
                    PixivHelper.print_and_log(
                        None,
                        f"Page limit reached (from config ={config.numberOfPage})"
                    )
                    flag = False

            del artist
            del list_page
            PixivBrowserFactory.getBrowser(config=config).clear_history()
            gc.collect()

        log_message = ""
        if int(image_id) > 0:
            db.updateLastDownloadedImage(member_id, image_id)
            log_message = f'last image_id: {image_id}'
        else:
            log_message = 'no images were found.'

        PixivHelper.print_and_log(
            "info", f"Member_id: {member_id} completed: {log_message}")
    except KeyboardInterrupt:
        raise
    except Exception:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        traceback.print_exception(exc_type, exc_value, exc_traceback)
        PixivHelper.print_and_log(
            'error', f'Error at process_member(): {sys.exc_info()}')
        try:
            if list_page is not None:
                dump_filename = f'Error page for member {member_id} at page {page}.html'
                PixivHelper.dump_html(dump_filename, list_page)
                PixivHelper.print_and_log('error',
                                          f"Dumping html to: {dump_filename}")
        except BaseException:
            PixivHelper.print_and_log(
                'error', f'Cannot dump page for member_id: {member_id}')
        raise
Esempio n. 11
0
def process_tags(caller,
                 config,
                 tags,
                 page=1,
                 end_page=0,
                 wild_card=True,
                 title_caption=False,
                 start_date=None,
                 end_date=None,
                 use_tags_as_dir=False,
                 member_id=None,
                 bookmark_count=None,
                 sort_order='date_d',
                 type_mode=None,
                 notifier=None):
    if notifier is None:
        notifier = PixivHelper.dummy_notifier

    search_page = None
    _last_search_result = None
    i = page
    updated_limit_count = 0
    empty_page_retry = 0

    try:
        search_tags = PixivHelper.decode_tags(tags)

        root_dir = config.rootDirectory
        if use_tags_as_dir:
            PixivHelper.print_and_log(None, "Save to each directory using query tags.")
            root_dir = config.rootDirectory + os.sep + PixivHelper.sanitize_filename(search_tags)

        tags = PixivHelper.encode_tags(tags)

        images = 1
        last_image_id = -1
        skipped_count = 0
        use_bookmark_data = False
        if bookmark_count is not None and bookmark_count > 0:
            use_bookmark_data = True

        offset = PixivTags.POSTS_PER_PAGE
        start_offset = (page - 1) * offset
        stop_offset = end_page * offset

        PixivHelper.print_and_log('info', f'Searching for: ({search_tags}) {tags} with partial match = {wild_card} and title/caption = {title_caption}')
        flag = True
        while flag:
            (t, search_page) = PixivBrowserFactory.getBrowser().getSearchTagPage(tags,
                                                                                 i,
                                                                                 wild_card=wild_card,
                                                                                 title_caption=title_caption,
                                                                                 start_date=start_date,
                                                                                 end_date=end_date,
                                                                                 member_id=member_id,
                                                                                 sort_order=sort_order,
                                                                                 start_page=page,
                                                                                 use_bookmark_data=use_bookmark_data,
                                                                                 bookmark_count=bookmark_count,
                                                                                 type_mode=type_mode,
                                                                                 r18mode=config.r18mode)

            PixivHelper.print_and_log("info", f'Found {len(t.itemList)} images for page {i}.')
            if len(t.itemList) == 0:
                # Issue #1090
                # check if the available images matching with current page * PixivTags.POSTS_PER_PAGE
                # and wait for {timeout} seconds and retry the page up to {config.retry} times.
                if _last_search_result is not None and _last_search_result.availableImages > (PixivTags.POSTS_PER_PAGE * i) and empty_page_retry < config.retry:
                    PixivHelper.print_and_log("warn", f'Server did not return images, expected to have more (Total Post = {_last_search_result.availableImages}, current max posts = {PixivTags.POSTS_PER_PAGE * i}).')
                    # wait at least 2 minutes before retry
                    delay = config.timeout
                    if delay < 120:
                        delay = 120
                    PixivHelper.print_and_log(None, f"Waiting for {delay} seconds before retrying.")
                    PixivHelper.print_delay(delay)
                    empty_page_retry = empty_page_retry + 1
                    PixivBrowserFactory.getBrowser().addheaders = [('User-agent', f'{config.useragent}{int(time.time())}')]
                    continue
                else:
                    PixivHelper.print_and_log("warn", 'No more images.')
                    flag = False
            elif _last_search_result is not None:
                set1 = set((x.imageId) for x in _last_search_result.itemList)
                difference = [x for x in t.itemList if (x.imageId) not in set1]
                if len(difference) == 0:
                    PixivHelper.print_and_log("warn", 'Getting duplicated result set, no more new images.')
                    flag = False

            if flag:
                # Issue #1090 reset retry flag on succesfull load
                empty_page_retry = 0

                for item in t.itemList:
                    last_image_id = item.imageId
                    PixivHelper.print_and_log(None, f'Image #{images}')
                    PixivHelper.print_and_log(None, f'Image Id: {item.imageId}')

                    if bookmark_count is not None and bookmark_count > item.bookmarkCount:
                        PixivHelper.print_and_log(None, f'Bookmark Count: {item.bookmarkCount}')
                        PixivHelper.print_and_log('info', f'Skipping imageId= {item.imageId} because less than bookmark count limit ({bookmark_count} > {item.bookmarkCount}).')
                        skipped_count = skipped_count + 1
                        continue

                    result = 0
                    while True:
                        try:
                            if t.availableImages > 0:
                                # PixivHelper.print_and_log(None, "Total Images: " + str(t.availableImages))
                                total_image = t.availableImages
                                if(stop_offset > 0 and stop_offset < total_image):
                                    total_image = stop_offset
                                total_image = total_image - start_offset
                                # PixivHelper.print_and_log(None, "Total Images Offset: " + str(total_image))
                            else:
                                total_image = ((i - 1) * 20) + len(t.itemList)
                            title_prefix = "Tags:{0} Page:{1} Image {2}+{3} of {4}".format(tags, i, images, skipped_count, total_image)
                            if member_id is not None:
                                title_prefix = "MemberId: {0} Tags:{1} Page:{2} Image {3}+{4} of {5}".format(member_id,
                                                                                                             tags,
                                                                                                             i,
                                                                                                             images,
                                                                                                             skipped_count,
                                                                                                             total_image)
                            result = PixivConstant.PIXIVUTIL_OK
                            if not caller.DEBUG_SKIP_PROCESS_IMAGE:
                                result = PixivImageHandler.process_image(caller,
                                                                         config,
                                                                         None,
                                                                         item.imageId,
                                                                         user_dir=root_dir,
                                                                         search_tags=search_tags,
                                                                         title_prefix=title_prefix,
                                                                         bookmark_count=item.bookmarkCount,
                                                                         image_response_count=item.imageResponse,
                                                                         notifier=notifier)
                                PixivHelper.wait(result, config)
                            break
                        except KeyboardInterrupt:
                            result = PixivConstant.PIXIVUTIL_KEYBOARD_INTERRUPT
                            break
                        except http.client.BadStatusLine:
                            PixivHelper.print_and_log(None, "Stuff happened, trying again after 2 second...")
                            PixivHelper.print_delay(2)

                    images = images + 1
                    if result in (PixivConstant.PIXIVUTIL_SKIP_DUPLICATE,
                                  PixivConstant.PIXIVUTIL_SKIP_LOCAL_LARGER,
                                  PixivConstant.PIXIVUTIL_SKIP_DUPLICATE_NO_WAIT):
                        updated_limit_count = updated_limit_count + 1
                        if config.checkUpdatedLimit != 0 and updated_limit_count >= config.checkUpdatedLimit:
                            PixivHelper.print_and_log(None, f"Skipping tags: {tags}")
                            PixivBrowserFactory.getBrowser().clear_history()
                            return
                        gc.collect()
                        continue
                    elif result == PixivConstant.PIXIVUTIL_KEYBOARD_INTERRUPT:
                        choice = input("Keyboard Interrupt detected, continue to next image (Y/N)").rstrip("\r")
                        if choice.upper() == 'N':
                            PixivHelper.print_and_log("info", f"Tags: {tags}, processing aborted.")
                            flag = False
                            break
                        else:
                            continue

            PixivBrowserFactory.getBrowser().clear_history()

            i = i + 1
            _last_search_result = t

            if end_page != 0 and end_page < i:
                PixivHelper.print_and_log('info', f"End Page reached: {end_page}")
                flag = False
            if t.isLastPage:
                PixivHelper.print_and_log('info', f"Last page: {i - 1}")
                flag = False
            if config.enableInfiniteLoop and i == 1001 and sort_order != 'date':
                if last_image_id > 0:
                    # get the last date
                    PixivHelper.print_and_log('info', f"Hit page 1000, trying to get workdate for last image id: {last_image_id}.")
                    # referer = 'https://www.pixiv.net/en/artworks/{0}'.format(last_image_id)
                    result = PixivBrowserFactory.getBrowser().getImagePage(last_image_id)
                    _last_date = result[0].worksDateDateTime
                    # _start_date = image.worksDateDateTime + datetime.timedelta(365)
                    # hit the last page
                    i = 1
                    end_date = _last_date.strftime("%Y-%m-%d")
                    PixivHelper.print_and_log('info', f"Hit page 1000, looping back to page 1 with ecd: {end_date}.")
                    flag = True
                    last_image_id = -1
                else:
                    PixivHelper.print_and_log('info', "No more image in the list.")
                    flag = False

        PixivHelper.print_and_log(None, 'done')
        if search_page is not None:
            del search_page
    except KeyboardInterrupt:
        raise
    except BaseException:
        PixivHelper.print_and_log('error', f'Error at process_tags() at page {i}: {sys.exc_info()}')
        try:
            if search_page is not None:
                dump_filename = f'Error page for search tags {tags} at page {i}.html'
                PixivHelper.dump_html(dump_filename, search_page)
                PixivHelper.print_and_log('error', f"Dumping html to: {dump_filename}")
        except BaseException:
            PixivHelper.print_and_log('error', f'Cannot dump page for search tags: {search_tags}')
        raise
Esempio n. 12
0
def process_new_illust_from_bookmark(caller,
                                     config,
                                     page_num=1,
                                     end_page_num=0,
                                     bookmark_count=-1):
    br: PixivBrowser = caller.__br__
    parsed_page = None
    try:
        print("Processing New Illust from bookmark")
        i = page_num
        image_count = 1
        flag = True
        while flag:
            print(f"Page #{i}")
            mode = "all"
            if config.r18mode:
                mode = "r18"
            pb = br.getFollowedNewIllusts(mode, current_page=i)

            # url = 'https://www.pixiv.net/bookmark_new_illust.php?p=' + str(i)
            # if config.r18mode:
            #     url = 'https://www.pixiv.net/bookmark_new_illust_r18.php?p=' + str(i)

            # PixivHelper.print_and_log('info', "Source URL: " + url)
            # page = br.open(url)
            # parsed_page = BeautifulSoup(page.read().decode("utf-8"), features="html5lib")
            # pb = PixivNewIllustBookmark(parsed_page)

            # if not pb.haveImages:
            #     print("No images!")
            #     break

            for image_id in pb.imageList:
                print(f"Image #{image_count}")
                result = PixivImageHandler.process_image(
                    caller,
                    config,
                    artist=None,
                    image_id=int(image_id),
                    bookmark_count=bookmark_count)
                image_count = image_count + 1

                if result == PixivConstant.PIXIVUTIL_SKIP_OLDER:
                    flag = False
                    break

                PixivHelper.wait(result, config)
            i = i + 1

            # page.close()
            # parsed_page.decompose()
            # del parsed_page

            if (end_page_num != 0 and i > end_page_num) or pb.isLastPage:
                print("Limit or last page reached.")
                flag = False

        print("Done.")
    except KeyboardInterrupt:
        raise
    except BaseException:
        PixivHelper.print_and_log(
            'error', 'Error at process_new_illust_from_bookmark(): {0}'.format(
                sys.exc_info()))
        if parsed_page is not None:
            filename = "Dump for New Illust from bookmark.html"
            PixivHelper.dump_html(filename, parsed_page)
        raise