Esempio n. 1
0
    def getSearchTagPage(self, tags,
                         current_page,
                         wild_card=True,
                         title_caption=False,
                         start_date=None,
                         end_date=None,
                         member_id=None,
                         oldest_first=False,
                         start_page=1):
        response = None
        result = None
        url = ''

        if member_id is not None:
            # from member id search by tags
            (artist, response) = self.getMemberPage(member_id, current_page, False, tags)

            # convert to PixivTags
            result = PixivModelWhiteCube.PixivTags()
            result.parseMemberTags(artist, member_id, tags)
        else:
            # search by tags
            url = PixivHelper.generateSearchTagUrl(tags, current_page,
                                                   title_caption,
                                                   wild_card,
                                                   oldest_first,
                                                   start_date,
                                                   end_date,
                                                   member_id,
                                                   self._config.r18mode)

            PixivHelper.print_and_log('info', 'Looping... for ' + url)
            # response = self.open(url).read()
            response = self.getPixivPage(url, returnParsed=False).read()
            self.handleDebugTagSearchPage(response, url)

            parse_search_page = BeautifulSoup(response)

            result = PixivModel.PixivTags()
            if member_id is not None:
                result.parseMemberTags(parse_search_page, member_id, tags)
            else:
                try:
                    result.parseTags(parse_search_page, tags)
                except BaseException:
                    PixivHelper.dumpHtml("Dump for SearchTags " + tags + ".html", response)
                    raise

            parse_search_page.decompose()
            del parse_search_page

        return (result, response)
Esempio n. 2
0
    def getSearchTagPage(self, tags,
                         current_page,
                         wild_card=True,
                         title_caption=False,
                         start_date=None,
                         end_date=None,
                         member_id=None,
                         oldest_first=False,
                         start_page=1):
        response = None
        result = None
        url = ''

        if member_id is not None:
            # from member id search by tags
            (artist, response) = self.getMemberPage(member_id, current_page, False, tags)

            # convert to PixivTags
            result = PixivModelWhiteCube.PixivTags()
            result.parseMemberTags(artist, member_id, tags)
        else:
            # search by tags
            url = PixivHelper.generateSearchTagUrl(tags, current_page,
                                                   title_caption,
                                                   wild_card,
                                                   oldest_first,
                                                   start_date,
                                                   end_date,
                                                   member_id,
                                                   self._config.r18mode)

            PixivHelper.print_and_log('info', 'Looping... for ' + url)
            # response = self.open(url).read()
            response = self.getPixivPage(url, returnParsed=False).read()
            self.handleDebugTagSearchPage(response, url)

            parse_search_page = BeautifulSoup(response)

            result = PixivModel.PixivTags()
            if member_id is not None:
                result.parseMemberTags(parse_search_page, member_id, tags)
            else:
                try:
                    result.parseTags(parse_search_page, tags)
                except BaseException:
                    PixivHelper.dumpHtml("Dump for SearchTags " + tags + ".html", response)
                    raise

            parse_search_page.decompose()
            del parse_search_page

        return (result, response)
Esempio n. 3
0
    def getSearchTagPage(self,
                         tags,
                         current_page,
                         wild_card=True,
                         title_caption=False,
                         start_date=None,
                         end_date=None,
                         member_id=None,
                         oldest_first=False,
                         start_page=1):
        response = None
        result = None
        url = ''

        if member_id is not None:
            ##            if member_id is None:
            ##                # from search page:
            ##                # https://www.pixiv.net/rpc/whitecube/index.php?order=date&adult_mode=include&q=vocaloid&p=0&type=&mode=whitecube_search&s_mode=s_tag&scd=&size=&ratio=&like=&tools=&tt=4e2cdee233f1156231ee99da1e51a83c
            ##                url = "https://www.pixiv.net/rpc/whitecube/index.php?q={0}".format(tags)
            ##                url = url + "&adult_mode={0}".format("include")
            ##                url = url + "&mode={0}".format("whitecube_search")
            ##
            ##                # date ordering
            ##                order = "date_d"
            ##                if oldest_first:
            ##                    order = "date"
            ##                url = url + "&order={0}".format(order)
            ##
            ##                # search mode
            ##                s_mode = "s_tag_full"
            ##                if wild_card:
            ##                    s_mode = "s_tag"
            ##                elif title_caption:
            ##                    s_mode = "s_tc"
            ##                url = url + "&s_mode={0}".format(s_mode)
            ##
            ##                # start/end date
            ##                if start_date is not None:
            ##                    url = url + "&scd={0}".format(start_date)
            ##                if end_date is not None:
            ##                    url = url + "&ecd={0}".format(end_date)
            ##
            ##                url = url + "&p={0}".format(i)
            ##                url = url + "&start_page={0}".format(start_page)
            ##                url = url + "&tt={0}".format(self._whitecubeToken)
            ##
            ##                PixivHelper.print_and_log('info', 'Looping for {0} ...'.format(url))
            ##                response = self.open(url).read()
            ##                self.handleDebugTagSearchPage(response, url)
            ##
            ##                PixivHelper.GetLogger().debug(response)
            ##                result = PixivModelWhiteCube.PixivTags()
            ##                result.parseTags(response, tags)
            ##            else:
            # from member id search by tags
            (artist, response) = self.getMemberPage(member_id, current_page,
                                                    False, tags)

            # convert to PixivTags
            result = PixivModelWhiteCube.PixivTags()
            result.parseMemberTags(artist, member_id, tags)
        else:
            # search by tags
            url = PixivHelper.generateSearchTagUrl(tags, current_page,
                                                   title_caption, wild_card,
                                                   oldest_first, start_date,
                                                   end_date, member_id,
                                                   self._config.r18mode)

            PixivHelper.print_and_log('info', 'Looping... for ' + url)
            # response = self.open(url).read()
            response = self.getPixivPage(url, returnParsed=False).read()
            self.handleDebugTagSearchPage(response, url)

            parse_search_page = BeautifulSoup(response)

            result = PixivModel.PixivTags()
            if member_id is not None:
                result.parseMemberTags(parse_search_page, member_id, tags)
            else:
                try:
                    result.parseTags(parse_search_page, tags)
                except BaseException:
                    PixivHelper.dumpHtml(
                        "Dump for SearchTags " + tags + ".html", response)
                    raise

            parse_search_page.decompose()
            del parse_search_page

        return (result, response)
Esempio n. 4
0
    def getSearchTagPage(self, tags,
                         current_page,
                         wild_card=True,
                         title_caption=False,
                         start_date=None,
                         end_date=None,
                         member_id=None,
                         oldest_first=False,
                         start_page=1,
                         include_bookmark_data=False):
        response = None
        result = None
        url = ''

        if member_id is not None:
            # from member id search by tags
            (artist, response) = self.getMemberPage(member_id, current_page, False, tags)

            # convert to PixivTags
            result = PixivTags()
            result.parseMemberTags(artist, member_id, tags)
        else:
            # search by tags
            url = PixivHelper.generateSearchTagUrl(tags, current_page,
                                                   title_caption,
                                                   wild_card,
                                                   oldest_first,
                                                   start_date,
                                                   end_date,
                                                   member_id,
                                                   self._config.r18mode)

            PixivHelper.print_and_log('info', 'Looping... for ' + url)
            response = self.getPixivPage(url, returnParsed=False)
            self.handleDebugTagSearchPage(response, url)

            result = None
            if member_id is not None:
                result = PixivTags()
                parse_search_page = BeautifulSoup(response)
                result.parseMemberTags(parse_search_page, member_id, tags)
                parse_search_page.decompose()
                del parse_search_page
            else:
                try:
                    result = PixivTags()
                    result.parseTags(response, tags, current_page)

                    # parse additional information
                    if include_bookmark_data:
                        idx = 0
                        print("Retrieving bookmark information...", end=' ')
                        for image in result.itemList:
                            idx = idx + 1
                            print("\r", end=' ')
                            print("Retrieving bookmark information... [{0}] of [{1}]".format(idx, len(result.itemList)), end=' ')

                            img_url = "https://www.pixiv.net/ajax/illust/{0}".format(image.imageId)
                            response = self._get_from_cache(img_url)
                            if response is None:
                                try:
                                    response = self.open_with_retry(img_url).read()
                                except urllib2.HTTPError as ex:
                                    if ex.code == 404:
                                        response = ex.read()
                                self._put_to_cache(img_url, response)

                            image_info_js = json.loads(response)
                            image.bookmarkCount = int(image_info_js["body"]["bookmarkCount"])
                            image.imageResponse = int(image_info_js["body"]["responseCount"])
                    print("")
                except BaseException:
                    PixivHelper.dumpHtml("Dump for SearchTags " + tags + ".html", response)
                    raise

        return (result, response)
Esempio n. 5
0
    def getSearchTagPage(self,
                         tags,
                         i,
                         wild_card=True,
                         title_caption=False,
                         start_date=None,
                         end_date=None,
                         member_id=None,
                         oldest_first=False,
                         start_page=1):
        response = None
        result = None

        if self._isWhitecube:
            if member_id is None:
                # from search page:
                # https://www.pixiv.net/rpc/whitecube/index.php?order=date&adult_mode=include&q=vocaloid&p=0&type=&mode=whitecube_search&s_mode=s_tag&scd=&size=&ratio=&like=&tools=&tt=4e2cdee233f1156231ee99da1e51a83c
                url = "https://www.pixiv.net/rpc/whitecube/index.php?q={0}".format(
                    tags)
                url = url + "&adult_mode={0}".format("include")
                url = url + "&mode={0}".format("whitecube_search")

                # date ordering
                order = "date_d"
                if oldest_first:
                    order = "date"
                url = url + "&order={0}".format(order)

                # search mode
                s_mode = "s_tag_full"
                if wild_card:
                    s_mode = "s_tag"
                elif title_caption:
                    s_mode = "s_tc"
                url = url + "&s_mode={0}".format(s_mode)

                # start/end date
                if start_date is not None:
                    url = url + "&scd={0}".format(start_date)
                if end_date is not None:
                    url = url + "&ecd={0}".format(end_date)

                url = url + "&p={0}".format(i)
                url = url + "&start_page={0}".format(start_page)
                url = url + "&tt={0}".format(self._whitecubeToken)

                PixivHelper.printAndLog('info',
                                        'Looping for {0} ...'.format(url))
                response = self.open(url).read()
                PixivHelper.GetLogger().debug(response)
                result = PixivModelWhiteCube.PixivTags()
                result.parseTags(response, tags)
            else:
                # from member id search by tags
                print "Not supported yet"
        else:
            url = PixivHelper.generateSearchTagUrl(tags, i, title_caption,
                                                   wild_card, oldest_first,
                                                   start_date, end_date,
                                                   member_id,
                                                   self._config.r18mode)

            PixivHelper.printAndLog('info', 'Looping... for ' + url)
            response = self.open(url).read()
            parse_search_page = BeautifulSoup(response)

            if self._config.dumpTagSearchPage and self._config.enableDump:
                dump_filename = PixivHelper.dumpHtml(url + ".html",
                                                     parse_search_page)
                PixivHelper.printAndLog(
                    'info', "Dump tag search page to: " + dump_filename)

            result = PixivModel.PixivTags()
            if not member_id is None:
                result.parseMemberTags(parse_search_page, member_id, tags)
            else:
                try:
                    result.parseTags(parse_search_page, tags)
                except:
                    PixivHelper.dumpHtml(
                        "Dump for SearchTags " + tags + ".html",
                        search_page.get_data())
                    raise

            parse_search_page.decompose()
            del parse_search_page

        return (result, response)
Esempio n. 6
0
    def getSearchTagPage(self, tags,
                         current_page,
                         wild_card=True,
                         title_caption=False,
                         start_date=None,
                         end_date=None,
                         member_id=None,
                         oldest_first=False,
                         start_page=1):
        response = None
        result = None
        url = ''

        if member_id is not None:
            ##            if member_id is None:
            ##                # from search page:
            ##                # https://www.pixiv.net/rpc/whitecube/index.php?order=date&adult_mode=include&q=vocaloid&p=0&type=&mode=whitecube_search&s_mode=s_tag&scd=&size=&ratio=&like=&tools=&tt=4e2cdee233f1156231ee99da1e51a83c
            ##                url = "https://www.pixiv.net/rpc/whitecube/index.php?q={0}".format(tags)
            ##                url = url + "&adult_mode={0}".format("include")
            ##                url = url + "&mode={0}".format("whitecube_search")
            ##
            ##                # date ordering
            ##                order = "date_d"
            ##                if oldest_first:
            ##                    order = "date"
            ##                url = url + "&order={0}".format(order)
            ##
            ##                # search mode
            ##                s_mode = "s_tag_full"
            ##                if wild_card:
            ##                    s_mode = "s_tag"
            ##                elif title_caption:
            ##                    s_mode = "s_tc"
            ##                url = url + "&s_mode={0}".format(s_mode)
            ##
            ##                # start/end date
            ##                if start_date is not None:
            ##                    url = url + "&scd={0}".format(start_date)
            ##                if end_date is not None:
            ##                    url = url + "&ecd={0}".format(end_date)
            ##
            ##                url = url + "&p={0}".format(i)
            ##                url = url + "&start_page={0}".format(start_page)
            ##                url = url + "&tt={0}".format(self._whitecubeToken)
            ##
            ##                PixivHelper.print_and_log('info', 'Looping for {0} ...'.format(url))
            ##                response = self.open(url).read()
            ##                self.handleDebugTagSearchPage(response, url)
            ##
            ##                PixivHelper.GetLogger().debug(response)
            ##                result = PixivModelWhiteCube.PixivTags()
            ##                result.parseTags(response, tags)
            ##            else:
            # from member id search by tags
            (artist, response) = self.getMemberPage(member_id, current_page, False, tags)

            # convert to PixivTags
            result = PixivModelWhiteCube.PixivTags()
            result.parseMemberTags(artist, member_id, tags)
        else:
            # search by tags
            url = PixivHelper.generateSearchTagUrl(tags, current_page,
                                                   title_caption,
                                                   wild_card,
                                                   oldest_first,
                                                   start_date,
                                                   end_date,
                                                   member_id,
                                                   self._config.r18mode)

            PixivHelper.print_and_log('info', 'Looping... for ' + url)
            # response = self.open(url).read()
            response = self.getPixivPage(url, returnParsed=False).read()
            self.handleDebugTagSearchPage(response, url)

            parse_search_page = BeautifulSoup(response)

            result = PixivModel.PixivTags()
            if member_id is not None:
                result.parseMemberTags(parse_search_page, member_id, tags)
            else:
                try:
                    result.parseTags(parse_search_page, tags)
                except BaseException:
                    PixivHelper.dumpHtml("Dump for SearchTags " + tags + ".html", response)
                    raise

            parse_search_page.decompose()
            del parse_search_page

        return (result, response)