Example #1
0
    def parsePost(self, jsPost):
        self.imageTitle = jsPost["title"]

        coverUrl = jsPost["coverImageUrl"]
        # Issue #930
        if not self.coverImageUrl and coverUrl:
            self.coverImageUrl = _re_fanbox_cover.sub("fanbox", coverUrl)
            self.try_add(coverUrl, self.embeddedFiles)

        self.worksDate = jsPost["publishedDatetime"]
        self.worksDateDateTime = datetime_z.parse_datetime(self.worksDate)
        self.updatedDate = jsPost["updatedDatetime"]
        self.updatedDateDatetime = datetime_z.parse_datetime(self.updatedDate)

        if "feeRequired" in jsPost:
            self.feeRequired = jsPost["feeRequired"]

        # Issue #420
        if self._tzInfo is not None:
            self.worksDateDateTime = self.worksDateDateTime.astimezone(
                self._tzInfo)

        self.type = jsPost["type"]
        if self.type not in FanboxPost._supportedType:
            raise PixivException(
                f"Unsupported post type = {self.type} for post = {self.imageId}",
                errorCode=9999,
                htmlPage=jsPost)

        self.likeCount = int(jsPost["likeCount"])
        if jsPost["body"] is None:
            self.is_restricted = True
    def parse_post(self, page):
        # post title taken from username
        self.imageTitle = page["user"]["name"]
        self.imageCaption = page["text"]
        self.imageTags = list()
        self.tags = list()
        for tag in page["tags"]:
            self.imageTags.append(tag)
            self.tags.append(PixivTagData(tag, None))

        for media in page["media"]:
            self.imageMode = media["type"]
            self.imageUrls.append(media["photo"]["original"]["url"])
            self.imageResizedUrls.append(media["photo"]["w540"]["url"])

        self.worksDateDateTime = datetime_z.parse_datetime(
            str(page["published_at"]))
        if self._tzInfo is not None:
            self.worksDateDateTime = self.worksDateDateTime.astimezone(
                self._tzInfo)
        self.worksUpdateDateTime = datetime_z.parse_datetime(
            str(page["updated_at"]))
        if self._tzInfo is not None:
            self.worksUpdateDateTime = self.worksUpdateDateTime.astimezone(
                self._tzInfo)

        tempDateFormat = self.dateFormat or "%m/%d/%y %H:%M"  # 2/27/2018 12:31
        self.worksDate = self.worksDateDateTime.strftime(tempDateFormat)
        self.worksUpdateDate = self.worksUpdateDateTime.strftime(
            tempDateFormat)
    def parse_post(self, page):
        # post title taken from username
        self.imageTitle = page["user"]["name"]
        self.imageCaption = page["text"]
        self.imageTags = list()
        self.tags = list()
        for tag in page["tags"]:
            self.imageTags.append(tag)
            self.tags.append(PixivTagData(tag, None))

        # add R-18 tag if is_r18 = True
        if "is_r18" in page and page["is_r18"]:
            self.imageTags.append('R-18')
            self.tags.append(PixivTagData('R-18', None))

        for media in page["media"]:
            self.imageMode = media["type"]
            self.imageUrls.append(media["photo"]["original"]["url"])
            self.imageResizedUrls.append(media["photo"]["w540"]["url"])

        self.worksDateDateTime = datetime_z.parse_datetime(
            str(page["published_at"]))
        if self._tzInfo is not None:
            self.worksDateDateTime = self.worksDateDateTime.astimezone(
                self._tzInfo)
        self.worksUpdateDateTime = datetime_z.parse_datetime(
            str(page["updated_at"]))
        if self._tzInfo is not None:
            self.worksUpdateDateTime = self.worksUpdateDateTime.astimezone(
                self._tzInfo)

        tempDateFormat = self.dateFormat or "%Y-%m-%d"  # 2018-07-22, else configured in config.ini
        self.worksDate = self.worksDateDateTime.strftime(tempDateFormat)
        self.worksUpdateDate = self.worksUpdateDateTime.strftime(
            tempDateFormat)
Example #4
0
    def parsePost(self, jsPost):
        self.imageTitle = jsPost["title"]

        self.coverImageUrl = jsPost["coverImageUrl"]
        if self.coverImageUrl is not None and self.coverImageUrl not in self.embeddedFiles:
            self.embeddedFiles.append(jsPost["coverImageUrl"])

        self.worksDate = jsPost["publishedDatetime"]
        self.worksDateDateTime = datetime_z.parse_datetime(self.worksDate)
        self.updatedDate = jsPost["updatedDatetime"]
        self.updatedDateDatetime = datetime_z.parse_datetime(self.updatedDate)

        if "feeRequired" in jsPost:
            self.feeRequired = jsPost["feeRequired"]

        # Issue #420
        if self._tzInfo is not None:
            self.worksDateDateTime = self.worksDateDateTime.astimezone(
                self._tzInfo)

        self.type = jsPost["type"]
        if self.type not in FanboxPost._supportedType:
            raise PixivException(
                "Unsupported post type = {0} for post = {1}".format(
                    self.type, self.imageId),
                errorCode=9999,
                htmlPage=jsPost)

        self.likeCount = int(jsPost["likeCount"])
        if jsPost["body"] is None:
            self.is_restricted = True
Example #5
0
    def parse(self):
        js = json.loads(self.novel_json_str)
        if js["error"]:
            raise PixivException("Cannot get novel details",
                                 errorCode=PixivException.UNKNOWN_IMAGE_ERROR,
                                 htmlPage=self.novel_json_str)

        root = js["body"]

        self.imageTitle = root["title"]
        self.content = root["content"]
        self.artist_id = root["userId"]
        self.bookmark_count = root["bookmarkCount"]
        self.image_response_count = root["imageResponseCount"]
        self.seriesNavData = root["seriesNavData"]
        if root["seriesNavData"] is not None:
            self.seriesId = root["seriesNavData"]["seriesId"]
            self.seriesOrder = root["seriesNavData"]["order"]
        self.isOriginal = root["isOriginal"]
        self.isBungei = root["isBungei"]
        self.language = root["language"]
        self.xRestrict = root["xRestrict"]

        # datetime
        self.worksDateDateTime = datetime_z.parse_datetime(root["createDate"])
        self.uploadDate = datetime_z.parse_datetime(root["uploadDate"])
        self.js_createDate = root["createDate"]  # store for json file
        if self._tzInfo is not None:
            self.worksDateDateTime = self.worksDateDateTime.astimezone(
                self._tzInfo)
            self.uploadDate = self.uploadDate.astimezone(self._tzInfo)

        tempDateFormat = self.dateFormat or "%Y-%m-%d"  # 2018-07-22, else configured in config.ini
        self.worksDate = self.worksDateDateTime.strftime(tempDateFormat)

        # tags
        self.imageTags = list()
        self.tags = list()
        tags = root["tags"]
        if tags is not None:
            tags = root["tags"]["tags"]
            for tag in tags:
                self.imageTags.append(tag["tag"])
                self.tags.append(PixivTagData(tag["tag"], tag))

        # append original tag
        if root["isOriginal"]:
            self.imageTags.append("オリジナル")
            tag = {
                "tag": "オリジナル",
                "locked": True,
                "deletable": False,
                "userId": "",
                "romaji": "original",
                "translation": {
                    "en": "original"
                }
            }
            self.tags.append(PixivTagData(tag["tag"], tag))
Example #6
0
    def parsePost(self, jsPost):
        self.imageTitle = jsPost["title"]
        self.coverImageUrl = jsPost["coverImageUrl"]
        self.worksDate = jsPost["publishedDatetime"]
        self.worksDateDateTime = datetime_z.parse_datetime(self.worksDate)
        self.updatedDatetime = jsPost["updatedDatetime"]
        self.type = jsPost["type"]
        if self.type not in ["image", "text", "file"]:
            raise PixivException("Unsupported post type = {0} for post = ".format(self.type, self.imageId), errorCode=9999, htmlPage=jsPost)

        self.likeCount = int(jsPost["likeCount"])
        if jsPost["body"] is None:
            self.is_restricted = True
Example #7
0
    def parsePost(self, jsPost):
        self.imageTitle = jsPost["title"]
        self.coverImageUrl = jsPost["coverImageUrl"]
        if self.coverImageUrl is not None:
            self.embeddedFiles.append(jsPost["coverImageUrl"])
        self.worksDate = jsPost["publishedDatetime"]
        self.worksDateDateTime = datetime_z.parse_datetime(self.worksDate)
        # Issue #420
        if self._tzInfo is not None:
            self.worksDateDateTime = self.worksDateDateTime.astimezone(self._tzInfo)

        self.updatedDatetime = jsPost["updatedDatetime"]
        self.type = jsPost["type"]
        if self.type not in FanboxPost._supportedType:
            raise PixivException("Unsupported post type = {0} for post = {1}".format(self.type, self.imageId), errorCode=9999, htmlPage=jsPost)

        self.likeCount = int(jsPost["likeCount"])
        if jsPost["body"] is None:
            self.is_restricted = True
Example #8
0
def process_fanbox_post(caller, config, post, artist):
    # caller function/method
    # TODO: ideally to be removed or passed as argument
    db = caller.__dbManager__
    br = PixivBrowserFactory.getBrowser()

    db.insertPost(artist.artistId, post.imageId, post.imageTitle, post.feeRequired, post.worksDate, post.type)

    post_files = []

    flag_processed = False
    if config.checkDBProcessHistory:
        result = db.selectPostByPostId(post.imageId)
        if result:
            updated_date = result[5]
            if updated_date is not None and post.updatedDateDatetime <= datetime_z.parse_datetime(updated_date):
                flag_processed = True

    try:
        if not post.is_restricted and not flag_processed:
            br.fanboxUpdatePost(post)

        if ((not post.is_restricted) or config.downloadCoverWhenRestricted) and (not flag_processed) and config.downloadCover:
            # cover image
            if post.coverImageUrl is not None:
                # fake the image_url for filename compatibility, add post id and pagenum
                fake_image_url = post.coverImageUrl.replace("{0}/cover/".format(post.imageId),
                                                            "{0}_".format(post.imageId))
                filename = PixivHelper.make_filename(config.filenameFormatFanboxCover,
                                                     post,
                                                     artistInfo=artist,
                                                     tagsSeparator=config.tagsSeparator,
                                                     tagsLimit=config.tagsLimit,
                                                     fileUrl=fake_image_url,
                                                     bookmark=None,
                                                     searchTags='',
                                                     useTranslatedTag=config.useTranslatedTag,
                                                     tagTranslationLocale=config.tagTranslationLocale)
                filename = PixivHelper.sanitize_filename(filename, config.rootDirectory)
                post.linkToFile[post.coverImageUrl] = filename

                print("Downloading cover from {0}".format(post.coverImageUrl))
                print("Saved to {0}".format(filename))

                referer = "https://www.pixiv.net/fanbox/creator/{0}/post/{1}".format(artist.artistId, post.imageId)
                # don't pass the post id and page number to skip db check
                (result, filename) = PixivDownloadHandler.download_image(caller,
                                                                         post.coverImageUrl,
                                                                         filename,
                                                                         referer,
                                                                         config.overwrite,
                                                                         config.retry,
                                                                         config.backupOldFile,
                                                                         image=post)
                post_files.append((post.imageId, -1, filename))
                PixivHelper.get_logger().debug("Download %s result: %s", filename, result)
            else:
                PixivHelper.print_and_log("info", "No Cover Image for post: {0}.".format(post.imageId))

        if post.is_restricted:
            PixivHelper.print_and_log("info", "Skipping post: {0} due to restricted post.".format(post.imageId))
            return

        if flag_processed:
            PixivHelper.print_and_log("info", "Skipping post: {0} because it was downloaded before.".format(post.imageId))
            return

        if post.images is None or len(post.images) == 0:
            PixivHelper.print_and_log("info", "No Image available in post: {0}.".format(post.imageId))
        else:
            current_page = 0
            print("Image Count = {0}".format(len(post.images)))
            for image_url in post.images:
                # fake the image_url for filename compatibility, add post id and pagenum
                fake_image_url = image_url.replace("{0}/".format(post.imageId),
                                                   "{0}_p{1}_".format(post.imageId, current_page))
                filename = PixivHelper.make_filename(config.filenameFormatFanboxContent,
                                                     post,
                                                     artistInfo=artist,
                                                     tagsSeparator=config.tagsSeparator,
                                                     tagsLimit=config.tagsLimit,
                                                     fileUrl=fake_image_url,
                                                     bookmark=None,
                                                     searchTags='',
                                                     useTranslatedTag=config.useTranslatedTag,
                                                     tagTranslationLocale=config.tagTranslationLocale)

                filename = PixivHelper.sanitize_filename(filename, config.rootDirectory)

                post.linkToFile[image_url] = filename

                referer = "https://www.pixiv.net/fanbox/creator/{0}/post/{1}".format(artist.artistId, post.imageId)

                print("Downloading image {0} from {1}".format(current_page, image_url))
                print("Saved to {0}".format(filename))

                # filesize detection and overwrite issue
                _oldvalue = config.alwaysCheckFileSize
                config.alwaysCheckFileSize = False
                # don't pass the post id and page number to skip db check
                (result, filename) = PixivDownloadHandler.download_image(caller,
                                                                         image_url,
                                                                         filename,
                                                                         referer,
                                                                         False,  # config.overwrite somehow unable to get remote filesize
                                                                         config.retry,
                                                                         config.backupOldFile,
                                                                         image=post)
                if result == PixivConstant.PIXIVUTIL_ABORTED:
                    raise KeyboardInterrupt()
                post_files.append((post.imageId, current_page, filename))

                PixivHelper.get_logger().debug("Download %s result: %s", filename, result)

                config.alwaysCheckFileSize = _oldvalue
                current_page = current_page + 1

        # Implement #447
        filename = PixivHelper.make_filename(config.filenameFormatFanboxInfo,
                                             post,
                                             artistInfo=artist,
                                             tagsSeparator=config.tagsSeparator,
                                             tagsLimit=config.tagsLimit,
                                             fileUrl="{0}".format(post.imageId),
                                             bookmark=None,
                                             searchTags='',
                                             useTranslatedTag=config.useTranslatedTag,
                                             tagTranslationLocale=config.tagTranslationLocale)

        filename = PixivHelper.sanitize_filename(filename, config.rootDirectory)
        if config.writeImageInfo:
            post.WriteInfo(filename + ".txt")
        if config.writeHtml:
            if post.type == "article" or (len(post.images) >= config.minImageCountForNonArticle and len(post.body_text) > config.minTextLengthForNonArticle):
                html_template = PixivConstant.HTML_TEMPLATE
                if os.path.isfile("template.html"):
                    reader = PixivHelper.open_text_file("template.html")
                    html_template = reader.read()
                    reader.close()
                post.WriteHtml(html_template, config.useAbsolutePathsInHtml, filename + ".html")

        if config.writeUrlInDescription:
            PixivHelper.write_url_in_description(post, config.urlBlacklistRegex, config.urlDumpFilename)
    finally:
        if len(post_files) > 0:
            db.insertPostImages(post_files)

    db.updatePostUpdateDate(post.imageId, post.updatedDate)
    def ParseInfo(self, page):
        key = page["preload"]["illust"].keys()[0]
        assert (str(key) == str(self.imageId))
        root = page["preload"]["illust"][key]

        self.imageUrls = list()

        self.imageCount = int(root["pageCount"])
        temp_url = root["urls"]["original"]
        if self.imageCount == 1:
            if temp_url.find("ugoira") > 0:
                self.imageMode = "ugoira_view"
                # https://i.pximg.net/img-zip-ugoira/img/2018/04/22/00/01/06/68339821_ugoira600x600.zip 1920x1080
                # https://i.pximg.net/img-original/img/2018/04/22/00/01/06/68339821_ugoira0.jpg
                # https://i.pximg.net/img-original/img/2018/04/22/00/01/06/68339821_ugoira0.png
                # Fix Issue #372
                temp_url = temp_url.replace("/img-original/",
                                            "/img-zip-ugoira/")
                temp_url = temp_url.split("_ugoira0")[0]
                temp_url = temp_url + "_ugoira1920x1080.zip"
                self.imageUrls.append(temp_url)
                # self.ParseUgoira(page)
            else:
                self.imageMode = "big"
                self.imageUrls.append(temp_url)
        elif self.imageCount > 1:
            self.imageMode = "manga"
            for i in range(0, self.imageCount):
                url = temp_url.replace("_p0", "_p{0}".format(i))
                self.imageUrls.append(url)

        # title/caption
        self.imageTitle = root["illustTitle"]
        self.imageCaption = root["illustComment"]

        # view count
        self.jd_rtv = root["viewCount"]
        # like count
        self.jd_rtc = root["likeCount"]
        # not available anymore
        self.jd_rtt = self.jd_rtc

        # tags
        self.imageTags = list()
        tags = root["tags"]
        if tags is not None:
            tags = root["tags"]["tags"]
            for tag in tags:
                self.imageTags.append(tag["tag"])

        # datetime, in utc
        # "createDate" : "2018-06-08T15:00:04+00:00",
        self.worksDateDateTime = datetime_z.parse_datetime(
            str(root["createDate"]))
        tempDateFormat = self.dateFormat or "%m/%d/%y %H:%M"  # 2/27/2018 12:31
        self.worksDate = self.worksDateDateTime.strftime(tempDateFormat)

        # resolution
        self.worksResolution = "{0}x{1}".format(root["width"], root["height"])
        if self.imageCount > 1:
            self.worksResolution = "Multiple images: {0}P".format(
                self.imageCount)

        # tools = No more tool information
        self.worksTools = ""

        self.bookmark_count = root["bookmarkCount"]
        self.image_response_count = root["responseCount"]
Example #10
0
    def ParseInfo(self, page, writeRawJSON):
        key = list(page["illust"].keys())[0]
        assert (str(key) == str(self.imageId))
        root = page["illust"][key]
        # save the JSON if writeRawJSON is enabled
        if writeRawJSON:
            self.rawJSON = root

        self.imageUrls = list()
        self.imageResizedUrls = list()

        self.imageCount = int(root["pageCount"])
        temp_url = root["urls"]["original"]
        temp_resized_url = root["urls"]["regular"]
        if self.imageCount == 1:
            if temp_url.find("ugoira") > 0:
                # ugoira mode
                self.imageMode = "ugoira_view"
                # https://i.pximg.net/img-zip-ugoira/img/2018/04/22/00/01/06/68339821_ugoira600x600.zip 1920x1080
                # https://i.pximg.net/img-original/img/2018/04/22/00/01/06/68339821_ugoira0.jpg
                # https://i.pximg.net/img-original/img/2018/04/22/00/01/06/68339821_ugoira0.png
                # Fix Issue #372
                temp_url_ori = temp_url.replace("/img-original/",
                                                "/img-zip-ugoira/")
                temp_url_ori = temp_url_ori.split("_ugoira0")[0]
                temp_url_ori = temp_url_ori + "_ugoira1920x1080.zip"
                self.imageUrls.append(temp_url_ori)

                temp_resized_url = temp_url.replace("/img-original/",
                                                    "/img-zip-ugoira/")
                temp_resized_url = temp_resized_url.split("_ugoira0")[0]
                temp_resized_url = temp_resized_url + "_ugoira600x600.zip"
                self.imageResizedUrls.append(temp_resized_url)
            else:
                # single page image
                self.imageMode = "big"
                self.imageUrls.append(temp_url)
                self.imageResizedUrls.append(temp_resized_url)
        elif self.imageCount > 1:
            # multi-page images
            self.imageMode = "manga"
            for i in range(0, self.imageCount):
                url = temp_url.replace("_p0", "_p{0}".format(i))
                self.imageUrls.append(url)
                resized_url = temp_resized_url.replace("_p0",
                                                       "_p{0}".format(i))
                self.imageResizedUrls.append(resized_url)

        # title/caption
        self.imageTitle = root["illustTitle"]
        self.imageCaption = root["illustComment"]
        # Series
        self.seriesNavData = root["seriesNavData"]
        # view count
        self.jd_rtv = root["viewCount"]
        # like count
        self.jd_rtc = root["likeCount"]
        # not available anymore
        self.jd_rtt = self.jd_rtc

        # tags
        self.imageTags = list()
        tags = root["tags"]
        if tags is not None:
            tags = root["tags"]["tags"]
            for tag in tags:
                self.imageTags.append(tag["tag"])

                # 701
                self.tags.append(PixivTagData(tag["tag"], tag))

        # datetime, in utc
        # "createDate" : "2018-06-08T15:00:04+00:00",
        self.worksDateDateTime = datetime_z.parse_datetime(root["createDate"])
        self.js_createDate = root["createDate"]  # store for json file
        # Issue #420
        if self._tzInfo is not None:
            self.worksDateDateTime = self.worksDateDateTime.astimezone(
                self._tzInfo)

        tempDateFormat = self.dateFormat or "%Y-%m-%d"  # 2018-07-22, else configured in config.ini
        self.worksDate = self.worksDateDateTime.strftime(tempDateFormat)

        # resolution
        self.worksResolution = "{0}x{1}".format(root["width"], root["height"])
        if self.imageCount > 1:
            self.worksResolution = "Multiple images: {0}P".format(
                self.imageCount)

        self.bookmark_count = root["bookmarkCount"]
        self.image_response_count = root["responseCount"]

        # Issue 421
        parsed = BeautifulSoup(self.imageCaption, features="html5lib")
        links = parsed.findAll('a')
        if links is not None and len(links) > 0:
            for link in links:
                link_str = link["href"]
                # "/jump.php?http%3A%2F%2Farsenixc.deviantart.com%2Fart%2FWatchmaker-house-567480110"
                if link_str.startswith("/jump.php?"):
                    link_str = link_str[10:]
                    link_str = urllib.parse.unquote(link_str)
                self.descriptionUrlList.append(link_str)
        parsed.decompose()
        del parsed
Example #11
0
    def ParseInfo(self, page):
        key = list(page["illust"].keys())[0]
        assert (str(key) == str(self.imageId))
        root = page["illust"][key]

        self.imageUrls = list()

        self.imageCount = int(root["pageCount"])
        temp_url = root["urls"]["original"]
        if self.imageCount == 1:
            if temp_url.find("ugoira") > 0:
                self.imageMode = "ugoira_view"
                # https://i.pximg.net/img-zip-ugoira/img/2018/04/22/00/01/06/68339821_ugoira600x600.zip 1920x1080
                # https://i.pximg.net/img-original/img/2018/04/22/00/01/06/68339821_ugoira0.jpg
                # https://i.pximg.net/img-original/img/2018/04/22/00/01/06/68339821_ugoira0.png
                # Fix Issue #372
                temp_url = temp_url.replace("/img-original/",
                                            "/img-zip-ugoira/")
                temp_url = temp_url.split("_ugoira0")[0]
                temp_url = temp_url + "_ugoira1920x1080.zip"
                self.imageUrls.append(temp_url)
                # self.ParseUgoira(page)
            else:
                self.imageMode = "big"
                self.imageUrls.append(temp_url)
        elif self.imageCount > 1:
            self.imageMode = "manga"
            for i in range(0, self.imageCount):
                url = temp_url.replace("_p0", "_p{0}".format(i))
                self.imageUrls.append(url)

        # title/caption
        self.imageTitle = root["illustTitle"]
        self.imageCaption = root["illustComment"]

        # view count
        self.jd_rtv = root["viewCount"]
        # like count
        self.jd_rtc = root["likeCount"]
        # not available anymore
        self.jd_rtt = self.jd_rtc

        # tags
        self.imageTags = list()
        tags = root["tags"]
        if tags is not None:
            tags = root["tags"]["tags"]
            for tag in tags:
                self.imageTags.append(tag["tag"])

        # datetime, in utc
        # "createDate" : "2018-06-08T15:00:04+00:00",
        self.worksDateDateTime = datetime_z.parse_datetime(
            str(root["createDate"]))
        # Issue #420
        if self._tzInfo is not None:
            self.worksDateDateTime = self.worksDateDateTime.astimezone(
                self._tzInfo)

        tempDateFormat = self.dateFormat or "%m/%d/%y %H:%M"  # 2/27/2018 12:31
        self.worksDate = self.worksDateDateTime.strftime(tempDateFormat)

        # resolution
        self.worksResolution = "{0}x{1}".format(root["width"], root["height"])
        if self.imageCount > 1:
            self.worksResolution = "Multiple images: {0}P".format(
                self.imageCount)

        # tools = No more tool information
        self.worksTools = ""

        self.bookmark_count = root["bookmarkCount"]
        self.image_response_count = root["responseCount"]

        # Issue 421
        parsed = BeautifulSoup(self.imageCaption, features="html5lib")
        links = parsed.findAll('a')
        if links is not None and len(links) > 0:
            for link in links:
                link_str = link["href"]
                # "/jump.php?http%3A%2F%2Farsenixc.deviantart.com%2Fart%2FWatchmaker-house-567480110"
                if link_str.startswith("/jump.php?"):
                    link_str = link_str[10:]
                    link_str = urllib.parse.unquote(link_str)
                self.descriptionUrlList.append(link_str)
        parsed.decompose()
        del parsed
Example #12
0
    def ParseInfo(self, page, writeRawJSON):
        key = list(page["illust"].keys())[0]
        assert(str(key) == str(self.imageId))
        root = page["illust"][key]
        # save the JSON if writeRawJSON is enabled
        if writeRawJSON:
            self.rawJSON = root

        self.imageUrls = list()
        self.imageResizedUrls = list()

        self.imageCount = int(root["pageCount"])
        temp_url = root["urls"]["original"]
        temp_resized_url = root["urls"]["regular"]
        if self.imageCount == 1:
            if temp_url.find("ugoira") > 0:
                # ugoira mode
                self.imageMode = "ugoira_view"
                # https://i.pximg.net/img-zip-ugoira/img/2018/04/22/00/01/06/68339821_ugoira600x600.zip 1920x1080
                # https://i.pximg.net/img-original/img/2018/04/22/00/01/06/68339821_ugoira0.jpg
                # https://i.pximg.net/img-original/img/2018/04/22/00/01/06/68339821_ugoira0.png
                # Fix Issue #372
                temp_url_ori = temp_url.replace("/img-original/", "/img-zip-ugoira/")
                temp_url_ori = temp_url_ori.split("_ugoira0")[0]
                temp_url_ori = temp_url_ori + "_ugoira1920x1080.zip"
                self.imageUrls.append(temp_url_ori)

                temp_resized_url = temp_url.replace("/img-original/", "/img-zip-ugoira/")
                temp_resized_url = temp_resized_url.split("_ugoira0")[0]
                temp_resized_url = temp_resized_url + "_ugoira600x600.zip"
                self.imageResizedUrls.append(temp_resized_url)
            else:
                # single page image
                self.imageMode = "big"
                self.imageUrls.append(temp_url)
                self.imageResizedUrls.append(temp_resized_url)
        elif self.imageCount > 1:
            # multi-page images
            self.imageMode = "manga"
            for i in range(0, self.imageCount):
                url = temp_url.replace("_p0", "_p{0}".format(i))
                self.imageUrls.append(url)
                resized_url = temp_resized_url.replace("_p0", "_p{0}".format(i))
                self.imageResizedUrls.append(resized_url)

        # title/caption
        self.imageTitle = root["illustTitle"]
        self.imageCaption = root["illustComment"]
        # Series
        self.seriesNavData = root["seriesNavData"]
        # view count
        self.jd_rtv = root["viewCount"]
        # like count
        self.jd_rtc = root["likeCount"]
        # not available anymore
        self.jd_rtt = self.jd_rtc

        # tags
        self.imageTags = list()
        tags = root["tags"]
        if tags is not None:
            tags = root["tags"]["tags"]
            for tag in tags:
                self.imageTags.append(tag["tag"])

                # 701
                self.tags.append(PixivTagData(tag["tag"], tag))

        # datetime, in utc
        # "createDate" : "2018-06-08T15:00:04+00:00",
        self.worksDateDateTime = datetime_z.parse_datetime(root["createDate"])
        self.js_createDate = root["createDate"]  # store for json file
        # Issue #420
        if self._tzInfo is not None:
            self.worksDateDateTime = self.worksDateDateTime.astimezone(self._tzInfo)

        tempDateFormat = self.dateFormat or "%Y-%m-%d"     # 2018-07-22, else configured in config.ini
        self.worksDate = self.worksDateDateTime.strftime(tempDateFormat)

        # resolution
        self.worksResolution = "{0}x{1}".format(root["width"], root["height"])
        if self.imageCount > 1:
            self.worksResolution = "Multiple images: {0}P".format(self.imageCount)

        self.bookmark_count = root["bookmarkCount"]
        self.image_response_count = root["responseCount"]

        # Issue 421
        self.parse_url_from_caption(self.imageCaption)

        # Strip HTML tags from caption once they have been collected by the above statement.
        if self.stripHTMLTagsFromCaption:
            self.imageCaption = BeautifulSoup(self.imageCaption, "lxml").text

        # Issue #1064
        if "titleCaptionTranslation" in root:
            if "workTitle" in root["titleCaptionTranslation"] and \
               root["titleCaptionTranslation"]["workTitle"] is not None and \
               len(root["titleCaptionTranslation"]["workTitle"]) > 0:
                self.translated_work_title = root["titleCaptionTranslation"]["workTitle"]
            if "workCaption" in root["titleCaptionTranslation"] and \
               root["titleCaptionTranslation"]["workCaption"] is not None and \
               len(root["titleCaptionTranslation"]["workCaption"]) > 0:
                self.translated_work_caption = root["titleCaptionTranslation"]["workCaption"]
                self.parse_url_from_caption(self.translated_work_caption)
                if self.stripHTMLTagsFromCaption:
                    self.translated_work_caption = BeautifulSoup(self.translated_work_caption, "lxml").text
Example #13
0
    def ParseInfo(self, page):
        key = list(page["preload"]["illust"].keys())[0]
        assert(str(key) == str(self.imageId))
        root = page["preload"]["illust"][key]

        self.imageUrls = list()

        self.imageCount = int(root["pageCount"])
        temp_url = root["urls"]["original"]
        if self.imageCount == 1:
            if temp_url.find("ugoira") > 0:
                self.imageMode = "ugoira_view"
                # https://i.pximg.net/img-zip-ugoira/img/2018/04/22/00/01/06/68339821_ugoira600x600.zip 1920x1080
                # https://i.pximg.net/img-original/img/2018/04/22/00/01/06/68339821_ugoira0.jpg
                # https://i.pximg.net/img-original/img/2018/04/22/00/01/06/68339821_ugoira0.png
                # Fix Issue #372
                temp_url = temp_url.replace("/img-original/", "/img-zip-ugoira/")
                temp_url = temp_url.split("_ugoira0")[0]
                temp_url = temp_url + "_ugoira1920x1080.zip"
                self.imageUrls.append(temp_url)
                # self.ParseUgoira(page)
            else:
                self.imageMode = "big"
                self.imageUrls.append(temp_url)
        elif self.imageCount > 1:
            self.imageMode = "manga"
            for i in range(0, self.imageCount):
                url = temp_url.replace("_p0", "_p{0}".format(i))
                self.imageUrls.append(url)

        # title/caption
        self.imageTitle = root["illustTitle"]
        self.imageCaption = root["illustComment"]

        # view count
        self.jd_rtv = root["viewCount"]
        # like count
        self.jd_rtc = root["likeCount"]
        # not available anymore
        self.jd_rtt = self.jd_rtc

        # tags
        self.imageTags = list()
        tags = root["tags"]
        if tags is not None:
            tags = root["tags"]["tags"]
            for tag in tags:
                self.imageTags.append(tag["tag"])

        # datetime, in utc
        # "createDate" : "2018-06-08T15:00:04+00:00",
        self.worksDateDateTime = datetime_z.parse_datetime(str(root["createDate"]))
        tempDateFormat = self.dateFormat or "%m/%d/%y %H:%M"  # 2/27/2018 12:31
        self.worksDate = self.worksDateDateTime.strftime(tempDateFormat)

        # resolution
        self.worksResolution = "{0}x{1}".format(root["width"], root["height"])
        if self.imageCount > 1:
            self.worksResolution = "Multiple images: {0}P".format(self.imageCount)

        # tools = No more tool information
        self.worksTools = ""

        self.bookmark_count = root["bookmarkCount"]
        self.image_response_count = root["responseCount"]