def process_tags(caller,
                 tags,
                 page=1,
                 end_page=0,
                 wild_card=True,
                 title_caption=False,
                 start_date=None,
                 end_date=None,
                 use_tags_as_dir=False,
                 member_id=None,
                 bookmark_count=None,
                 oldest_first=False,
                 type_mode=None,
                 notifier=None,
                 job_option=None):
    # caller function/method
    # TODO: ideally to be removed or passed as argument
    config = caller.__config__
    config.loadConfig(path=caller.configfile)

    if notifier is None:
        notifier = PixivHelper.dummy_notifier

    # override the config source if job_option is give for filename formats
    format_src = config
    if job_option is not None:
        format_src = job_option

    search_page = None
    _last_search_result = None
    i = page
    updated_limit_count = 0

    try:
        search_tags = PixivHelper.decode_tags(tags)

        if use_tags_as_dir:
            PixivHelper.print_and_log(None, "Save to each directory using query tags.")
            format_src.rootDirectory += os.sep + PixivHelper.sanitize_filename(search_tags)

        tags = PixivHelper.encode_tags(tags)

        images = 1
        last_image_id = -1
        skipped_count = 0
        use_bookmark_data = False
        if bookmark_count is not None and bookmark_count > 0:
            use_bookmark_data = True

        offset = 60
        start_offset = (page - 1) * offset
        stop_offset = end_page * offset

        PixivHelper.print_and_log('info', f'Searching for: ({search_tags}) {tags}')
        flag = True
        while flag:
            (t, search_page) = PixivBrowserFactory.getBrowser().getSearchTagPage(tags,
                                                                                 i,
                                                                                 wild_card,
                                                                                 title_caption,
                                                                                 start_date,
                                                                                 end_date,
                                                                                 member_id,
                                                                                 oldest_first,
                                                                                 page,
                                                                                 use_bookmark_data,
                                                                                 bookmark_count,
                                                                                 type_mode,
                                                                                 r18mode=format_src.r18mode)
            if len(t.itemList) == 0:
                PixivHelper.print_and_log(None, 'No more images')
                flag = False
            elif _last_search_result is not None:
                set1 = set((x.imageId) for x in _last_search_result.itemList)
                difference = [x for x in t.itemList if (x.imageId) not in set1]
                if len(difference) == 0:
                    PixivHelper.print_and_log(None, 'Getting duplicated result set, no more new images.')
                    flag = False

            if flag:
                for item in t.itemList:
                    last_image_id = item.imageId
                    PixivHelper.print_and_log(None, f'Image #{images}')
                    PixivHelper.print_and_log(None, f'Image Id: {item.imageId}')

                    if bookmark_count is not None and bookmark_count > item.bookmarkCount:
                        PixivHelper.print_and_log(None, f'Bookmark Count: {item.bookmarkCount}')
                        PixivHelper.print_and_log('info', f'Skipping imageId= {item.imageId} because less than bookmark count limit ({bookmark_count} > {item.bookmarkCount}).')
                        skipped_count = skipped_count + 1
                        continue

                    result = 0
                    while True:
                        try:
                            if t.availableImages > 0:
                                # PixivHelper.print_and_log(None, "Total Images: " + str(t.availableImages))
                                total_image = t.availableImages
                                if(stop_offset > 0 and stop_offset < total_image):
                                    total_image = stop_offset
                                total_image = total_image - start_offset
                                # PixivHelper.print_and_log(None, "Total Images Offset: " + str(total_image))
                            else:
                                total_image = ((i - 1) * 20) + len(t.itemList)
                            title_prefix = "Tags:{0} Page:{1} Image {2}+{3} of {4}".format(tags, i, images, skipped_count, total_image)
                            if member_id is not None:
                                title_prefix = "MemberId: {0} Tags:{1} Page:{2} Image {3}+{4} of {5}".format(member_id,
                                                                                                             tags,
                                                                                                             i,
                                                                                                             images,
                                                                                                             skipped_count,
                                                                                                             total_image)
                            result = PixivConstant.PIXIVUTIL_OK
                            if not caller.DEBUG_SKIP_PROCESS_IMAGE:
                                result = PixivImageHandler.process_image(caller,
                                                                         config,
                                                                         None,
                                                                         item.imageId,
                                                                         user_dir=format_src.rootDirectory,
                                                                         search_tags=search_tags,
                                                                         title_prefix=title_prefix,
                                                                         bookmark_count=item.bookmarkCount,
                                                                         image_response_count=item.imageResponse,
                                                                         notifier=notifier,
                                                                         job_option=job_option)
                                PixivHelper.wait(result, config)
                            break
                        except KeyboardInterrupt:
                            result = PixivConstant.PIXIVUTIL_KEYBOARD_INTERRUPT
                            break
                        except http.client.BadStatusLine:
                            PixivHelper.print_and_log(None, "Stuff happened, trying again after 2 second...")
                            time.sleep(2)

                    images = images + 1
                    if result in (PixivConstant.PIXIVUTIL_SKIP_DUPLICATE,
                                  PixivConstant.PIXIVUTIL_SKIP_LOCAL_LARGER,
                                  PixivConstant.PIXIVUTIL_SKIP_DUPLICATE_NO_WAIT):
                        updated_limit_count = updated_limit_count + 1
                        if config.checkUpdatedLimit != 0 and updated_limit_count >= config.checkUpdatedLimit:
                            PixivHelper.print_and_log(None, f"Skipping tags: {tags}")
                            PixivBrowserFactory.getBrowser().clear_history()
                            return
                        gc.collect()
                        continue
                    elif result == PixivConstant.PIXIVUTIL_KEYBOARD_INTERRUPT:
                        choice = input("Keyboard Interrupt detected, continue to next image (Y/N)").rstrip("\r")
                        if choice.upper() == 'N':
                            PixivHelper.print_and_log("info", f"Tags: {tags}, processing aborted.")
                            flag = False
                            break
                        else:
                            continue

            PixivBrowserFactory.getBrowser().clear_history()

            i = i + 1
            _last_search_result = t

            if end_page != 0 and end_page < i:
                PixivHelper.print_and_log('info', f"End Page reached: {end_page}")
                flag = False
            if t.isLastPage:
                PixivHelper.print_and_log('info', f"Last page: {i - 1}")
                flag = False
            if config.enableInfiniteLoop and i == 1001 and not oldest_first:
                if last_image_id > 0:
                    # get the last date
                    PixivHelper.print_and_log('info', f"Hit page 1000, trying to get workdate for last image id: {last_image_id}.")
                    # referer = 'https://www.pixiv.net/en/artworks/{0}'.format(last_image_id)
                    result = PixivBrowserFactory.getBrowser().getImagePage(last_image_id)
                    _last_date = result[0].worksDateDateTime
                    # _start_date = image.worksDateDateTime + datetime.timedelta(365)
                    # hit the last page
                    i = 1
                    end_date = _last_date.strftime("%Y-%m-%d")
                    PixivHelper.print_and_log('info', f"Hit page 1000, looping back to page 1 with ecd: {end_date}.")
                    flag = True
                    last_image_id = -1
                else:
                    PixivHelper.print_and_log('info', "No more image in the list.")
                    flag = False

        PixivHelper.print_and_log(None, 'done')
        if search_page is not None:
            del search_page
    except KeyboardInterrupt:
        raise
    except BaseException:
        PixivHelper.print_and_log('error', f'Error at process_tags() at page {i}: {sys.exc_info()}')
        try:
            if search_page is not None:
                dump_filename = f'Error page for search tags {tags} at page {i}.html'
                PixivHelper.dump_html(dump_filename, search_page)
                PixivHelper.print_and_log('error', f"Dumping html to: {dump_filename}")
        except BaseException:
            PixivHelper.print_and_log('error', f'Cannot dump page for search tags: {search_tags}')
        raise
Example #2
0
def process_tags(caller,
                 config,
                 tags,
                 page=1,
                 end_page=0,
                 wild_card=True,
                 title_caption=False,
                 start_date=None,
                 end_date=None,
                 use_tags_as_dir=False,
                 member_id=None,
                 bookmark_count=None,
                 sort_order='date_d',
                 type_mode=None,
                 notifier=None):
    if notifier is None:
        notifier = PixivHelper.dummy_notifier

    search_page = None
    _last_search_result = None
    i = page
    updated_limit_count = 0
    empty_page_retry = 0

    try:
        search_tags = PixivHelper.decode_tags(tags)

        root_dir = config.rootDirectory
        if use_tags_as_dir:
            PixivHelper.print_and_log(None, "Save to each directory using query tags.")
            root_dir = config.rootDirectory + os.sep + PixivHelper.sanitize_filename(search_tags)

        tags = PixivHelper.encode_tags(tags)

        images = 1
        last_image_id = -1
        skipped_count = 0
        use_bookmark_data = False
        if bookmark_count is not None and bookmark_count > 0:
            use_bookmark_data = True

        offset = PixivTags.POSTS_PER_PAGE
        start_offset = (page - 1) * offset
        stop_offset = end_page * offset

        PixivHelper.print_and_log('info', f'Searching for: ({search_tags}) {tags} with partial match = {wild_card} and title/caption = {title_caption}')
        flag = True
        while flag:
            (t, search_page) = PixivBrowserFactory.getBrowser().getSearchTagPage(tags,
                                                                                 i,
                                                                                 wild_card=wild_card,
                                                                                 title_caption=title_caption,
                                                                                 start_date=start_date,
                                                                                 end_date=end_date,
                                                                                 member_id=member_id,
                                                                                 sort_order=sort_order,
                                                                                 start_page=page,
                                                                                 use_bookmark_data=use_bookmark_data,
                                                                                 bookmark_count=bookmark_count,
                                                                                 type_mode=type_mode,
                                                                                 r18mode=config.r18mode)

            PixivHelper.print_and_log("info", f'Found {len(t.itemList)} images for page {i}.')
            if len(t.itemList) == 0:
                # Issue #1090
                # check if the available images matching with current page * PixivTags.POSTS_PER_PAGE
                # and wait for {timeout} seconds and retry the page up to {config.retry} times.
                if _last_search_result is not None and _last_search_result.availableImages > (PixivTags.POSTS_PER_PAGE * i) and empty_page_retry < config.retry:
                    PixivHelper.print_and_log("warn", f'Server did not return images, expected to have more (Total Post = {_last_search_result.availableImages}, current max posts = {PixivTags.POSTS_PER_PAGE * i}).')
                    # wait at least 2 minutes before retry
                    delay = config.timeout
                    if delay < 120:
                        delay = 120
                    PixivHelper.print_and_log(None, f"Waiting for {delay} seconds before retrying.")
                    PixivHelper.print_delay(delay)
                    empty_page_retry = empty_page_retry + 1
                    PixivBrowserFactory.getBrowser().addheaders = [('User-agent', f'{config.useragent}{int(time.time())}')]
                    continue
                else:
                    PixivHelper.print_and_log("warn", 'No more images.')
                    flag = False
            elif _last_search_result is not None:
                set1 = set((x.imageId) for x in _last_search_result.itemList)
                difference = [x for x in t.itemList if (x.imageId) not in set1]
                if len(difference) == 0:
                    PixivHelper.print_and_log("warn", 'Getting duplicated result set, no more new images.')
                    flag = False

            if flag:
                # Issue #1090 reset retry flag on succesfull load
                empty_page_retry = 0

                for item in t.itemList:
                    last_image_id = item.imageId
                    PixivHelper.print_and_log(None, f'Image #{images}')
                    PixivHelper.print_and_log(None, f'Image Id: {item.imageId}')

                    if bookmark_count is not None and bookmark_count > item.bookmarkCount:
                        PixivHelper.print_and_log(None, f'Bookmark Count: {item.bookmarkCount}')
                        PixivHelper.print_and_log('info', f'Skipping imageId= {item.imageId} because less than bookmark count limit ({bookmark_count} > {item.bookmarkCount}).')
                        skipped_count = skipped_count + 1
                        continue

                    result = 0
                    while True:
                        try:
                            if t.availableImages > 0:
                                # PixivHelper.print_and_log(None, "Total Images: " + str(t.availableImages))
                                total_image = t.availableImages
                                if(stop_offset > 0 and stop_offset < total_image):
                                    total_image = stop_offset
                                total_image = total_image - start_offset
                                # PixivHelper.print_and_log(None, "Total Images Offset: " + str(total_image))
                            else:
                                total_image = ((i - 1) * 20) + len(t.itemList)
                            title_prefix = "Tags:{0} Page:{1} Image {2}+{3} of {4}".format(tags, i, images, skipped_count, total_image)
                            if member_id is not None:
                                title_prefix = "MemberId: {0} Tags:{1} Page:{2} Image {3}+{4} of {5}".format(member_id,
                                                                                                             tags,
                                                                                                             i,
                                                                                                             images,
                                                                                                             skipped_count,
                                                                                                             total_image)
                            result = PixivConstant.PIXIVUTIL_OK
                            if not caller.DEBUG_SKIP_PROCESS_IMAGE:
                                result = PixivImageHandler.process_image(caller,
                                                                         config,
                                                                         None,
                                                                         item.imageId,
                                                                         user_dir=root_dir,
                                                                         search_tags=search_tags,
                                                                         title_prefix=title_prefix,
                                                                         bookmark_count=item.bookmarkCount,
                                                                         image_response_count=item.imageResponse,
                                                                         notifier=notifier)
                                PixivHelper.wait(result, config)
                            break
                        except KeyboardInterrupt:
                            result = PixivConstant.PIXIVUTIL_KEYBOARD_INTERRUPT
                            break
                        except http.client.BadStatusLine:
                            PixivHelper.print_and_log(None, "Stuff happened, trying again after 2 second...")
                            PixivHelper.print_delay(2)

                    images = images + 1
                    if result in (PixivConstant.PIXIVUTIL_SKIP_DUPLICATE,
                                  PixivConstant.PIXIVUTIL_SKIP_LOCAL_LARGER,
                                  PixivConstant.PIXIVUTIL_SKIP_DUPLICATE_NO_WAIT):
                        updated_limit_count = updated_limit_count + 1
                        if config.checkUpdatedLimit != 0 and updated_limit_count >= config.checkUpdatedLimit:
                            PixivHelper.print_and_log(None, f"Skipping tags: {tags}")
                            PixivBrowserFactory.getBrowser().clear_history()
                            return
                        gc.collect()
                        continue
                    elif result == PixivConstant.PIXIVUTIL_KEYBOARD_INTERRUPT:
                        choice = input("Keyboard Interrupt detected, continue to next image (Y/N)").rstrip("\r")
                        if choice.upper() == 'N':
                            PixivHelper.print_and_log("info", f"Tags: {tags}, processing aborted.")
                            flag = False
                            break
                        else:
                            continue

            PixivBrowserFactory.getBrowser().clear_history()

            i = i + 1
            _last_search_result = t

            if end_page != 0 and end_page < i:
                PixivHelper.print_and_log('info', f"End Page reached: {end_page}")
                flag = False
            if t.isLastPage:
                PixivHelper.print_and_log('info', f"Last page: {i - 1}")
                flag = False
            if config.enableInfiniteLoop and i == 1001 and sort_order != 'date':
                if last_image_id > 0:
                    # get the last date
                    PixivHelper.print_and_log('info', f"Hit page 1000, trying to get workdate for last image id: {last_image_id}.")
                    # referer = 'https://www.pixiv.net/en/artworks/{0}'.format(last_image_id)
                    result = PixivBrowserFactory.getBrowser().getImagePage(last_image_id)
                    _last_date = result[0].worksDateDateTime
                    # _start_date = image.worksDateDateTime + datetime.timedelta(365)
                    # hit the last page
                    i = 1
                    end_date = _last_date.strftime("%Y-%m-%d")
                    PixivHelper.print_and_log('info', f"Hit page 1000, looping back to page 1 with ecd: {end_date}.")
                    flag = True
                    last_image_id = -1
                else:
                    PixivHelper.print_and_log('info', "No more image in the list.")
                    flag = False

        PixivHelper.print_and_log(None, 'done')
        if search_page is not None:
            del search_page
    except KeyboardInterrupt:
        raise
    except BaseException:
        PixivHelper.print_and_log('error', f'Error at process_tags() at page {i}: {sys.exc_info()}')
        try:
            if search_page is not None:
                dump_filename = f'Error page for search tags {tags} at page {i}.html'
                PixivHelper.dump_html(dump_filename, search_page)
                PixivHelper.print_and_log('error', f"Dumping html to: {dump_filename}")
        except BaseException:
            PixivHelper.print_and_log('error', f'Cannot dump page for search tags: {search_tags}')
        raise