def getPixivPage(self, url, referer="https://www.pixiv.net", returnParsed=True, enable_cache=True): ''' get page from pixiv and return as parsed BeautifulSoup object or response object. throw PixivException as server error ''' url = self.fixUrl(url) while True: req = mechanize.Request(url) req.add_header('Referer', referer) read_page = self._get_from_cache(url) if read_page is None: try: temp = self.open_with_retry(req) read_page = temp.read() read_page = read_page.decode('utf8') if enable_cache: self._put_to_cache(url, read_page) temp.close() except urllib.error.HTTPError as ex: if ex.code in [403, 404, 503]: read_page = ex.read() raise PixivException("Failed to get page: {0} => {1}".format( url, ex), errorCode=PixivException.SERVER_ERROR) else: PixivHelper.print_and_log( 'error', 'Error at getPixivPage(): {0}'.format(str(sys.exc_info()))) raise PixivException("Failed to get page: {0}".format( url), errorCode=PixivException.SERVER_ERROR) if returnParsed: parsedPage = BeautifulSoup(read_page, features="html5lib") return parsedPage return read_page
def __init__(self, mid=0, page=None, fromImage=False, offset=None, limit=None): self.offset = offset self.limit = limit self.artistId = mid if page is not None: payload = parseJs(page) # detect if image count != 0 if not fromImage: payload = demjson.decode(page) if payload["error"]: raise PixivException( payload["message"], errorCode=PixivException.OTHER_MEMBER_ERROR, htmlPage=page) if payload["body"] is None: raise PixivException( "Missing body content, possible artist id doesn't exists.", errorCode=PixivException.USER_ID_NOT_EXISTS, htmlPage=page) self.ParseImages(payload["body"]) else: self.isLastPage = True self.haveImages = True # parse artist info self.ParseInfo(payload, fromImage)
def getEmbedData(self, embedData, jsPost): if not os.path.exists("content_provider.json"): raise PixivException("Missing content_provider.json, please redownload application!", errorCode=PixivException.MISSING_CONFIG, htmlPage=None) cfg = demjson.decode_file("content_provider.json") embed_cfg = cfg["embedConfig"] current_provider = embedData["serviceProvider"] if current_provider in embed_cfg: if embed_cfg[current_provider]["ignore"]: return "" content_id = None for key in embed_cfg[current_provider]["keys"]: if embedData.has_key(key): content_id = embedData[key] break if content_id is not None and len(content_id) > 0: content_format = embed_cfg[current_provider]["format"] return content_format.format(content_id) else: raise PixivException("Empty content_id for embed provider = {0} for post = {1}, please update content_provider.json.".format(embedData["serviceProvider"], self.imageId), errorCode=9999, htmlPage=jsPost) else: raise PixivException("Unsupported embed provider = {0} for post = {1}, please update content_provider.json.".format(embedData["serviceProvider"], self.imageId), errorCode=9999, htmlPage=jsPost)
def getMemberInfoWhitecube(self, member_id, artist, bookmark=False): ''' get artist information using Ajax and AppAPI ''' try: url = 'https://app-api.pixiv.net/v1/user/detail?user_id={0}'.format( member_id) info = self.get_from_cache(url) if info is None: PixivHelper.GetLogger().debug("Getting member information: %s", member_id) infoStr = self.open(url).read() info = json.loads(infoStr) self.put_to_cache(url, info) artist.ParseInfo(info, False, bookmark=bookmark) # will throw HTTPError if user is suspended/not logged in. url_ajax = 'https://www.pixiv.net/ajax/user/{0}'.format(member_id) info_ajax = self.get_from_cache(url_ajax) if info_ajax is None: info_ajax_str = self.open(url_ajax).read() info_ajax = json.loads(info_ajax_str) self.put_to_cache(url_ajax, info_ajax) # 2nd pass to get the background artist.ParseBackground(info_ajax) return artist except urllib2.HTTPError, error: errorCode = error.getcode() errorMessage = error.get_data() PixivHelper.GetLogger().error("Error data: \r\n %s", errorMessage) payload = demjson.decode(errorMessage) # Issue #432 if payload.has_key("message"): msg = payload["message"] elif payload.has_key("error") and payload["error"] is not None: msgs = list() msgs.append(payload["error"]["user_message"]) msgs.append(payload["error"]["message"]) msgs.append(payload["error"]["reason"]) msg = ",".join(msgs) if errorCode == 401: raise PixivException(msg, errorCode=PixivException.NOT_LOGGED_IN, htmlPage=errorMessage) elif errorCode == 403: raise PixivException( msg, errorCode=PixivException.USER_ID_SUSPENDED, htmlPage=errorMessage) else: raise PixivException( msg, errorCode=PixivException.OTHER_MEMBER_ERROR, htmlPage=errorMessage)
def get_embed_url_data(self, embedData, jsPost) -> str: # Issue #1133 content_provider_path = os.path.abspath( os.path.dirname(sys.executable) + os.sep + "content_provider.json") if not os.path.exists(content_provider_path): content_provider_path = os.path.abspath("./content_provider.json") if not os.path.exists(content_provider_path): raise PixivException( f"Missing content_provider.json, please get it from https://github.com/Nandaka/PixivUtil2/blob/master/content_provider.json! Expected location => {content_provider_path}", errorCode=PixivException.MISSING_CONFIG, htmlPage=None) cfg = demjson3.decode_file(content_provider_path) embed_cfg = cfg["urlEmbedConfig"] current_provider = embedData["type"] if current_provider in embed_cfg: if embed_cfg[current_provider]["ignore"]: return "" # get urls from given keys for key in embed_cfg[current_provider]["get_link_keys"]: js_keys = key.split(".") root = embedData for js_key in js_keys: root = root[js_key] links = _url_pattern.finditer(root) for link in links: self.try_add(link.group(), self.descriptionUrlList) # get all the keys to list keys = list() for key in embed_cfg[current_provider]["keys"]: js_keys = key.split(".") root = embedData for js_key in js_keys: root = root[js_key] keys.append(root) template = embed_cfg[current_provider]["format"] result = template.format(*keys) return result else: msg = "Unsupported url embed provider = {0} for post = {1}, please update content_provider.json." raise PixivException(msg.format(embedData["serviceProvider"], self.imageId), errorCode=9999, htmlPage=jsPost)
def parsePosts(self, page): js = demjson.decode(page) if "error" in js and js["error"]: raise PixivException( "Error when requesting Fanbox artist: {0}".format(self.artistId), 9999, page) if js["body"] is not None: js_body = js["body"] posts = list() if "creator" in js_body: self.artistName = js_body["creator"]["user"]["name"] if "post" in js_body: # new api post_root = js_body["post"] else: # https://www.pixiv.net/ajax/fanbox/post?postId={0} # or old api post_root = js_body for jsPost in post_root["items"]: post_id = int(jsPost["id"]) post = FanboxPost(post_id, self, jsPost, tzInfo=self._tzInfo) posts.append(post) # sanity check assert (self.artistId == int(jsPost["user"]["userId"])), "Different user id from constructor!" self.nextUrl = post_root["nextUrl"] if self.nextUrl is not None and len(self.nextUrl) > 0: self.hasNextPage = True return posts
def open_with_retry( self, url, data=None, timeout=mechanize._sockettimeout._GLOBAL_DEFAULT_TIMEOUT, retry=0): retry_count = 0 if retry == 0 and self._config is not None: retry = self._config.retry while True: try: return self.open(url, data, timeout) except urllib.error.HTTPError: raise except BaseException: if retry_count < retry: for t in range(1, self._config.retryWait): print(t, end=' ') time.sleep(1) print('') retry_count = retry_count + 1 else: PixivHelper.print_and_log( 'error', 'Error at open_with_retry(): {0}'.format( str(sys.exc_info()))) raise PixivException( "Failed to get page: {0}, please check your internet connection/firewall/antivirus." .format(url), errorCode=PixivException.SERVER_ERROR)
def open_with_retry( self, url, data=None, timeout=mechanize._sockettimeout._GLOBAL_DEFAULT_TIMEOUT, retry=0): retry_count = 0 if retry == 0 and self._config is not None: retry = self._config.retry while True: try: return self.open(url, data, timeout) except urllib2.HTTPError: raise except Exception as ex: if retry_count < retry: for t in range(1, self._config.retryWait): print(t, end=' ') time.sleep(1) print('') retry_count = retry_count + 1 else: raise PixivException( "Failed to get page: {0}, please check your internet connection/firewall/antivirus." .format(ex.message), errorCode=PixivException.SERVER_ERROR)
def getPixivPage(self, url, referer="https://www.pixiv.net", returnParsed=True): ''' get page from pixiv and return as parsed BeautifulSoup object or response object. throw PixivException as server error ''' url = self.fixUrl(url) retry_count = 0 while True: req = urllib2.Request(url) req.add_header('Referer', referer) try: page = self.open_with_retry(req) if returnParsed: parsedPage = BeautifulSoup(page.read()) return parsedPage else: return page except urllib2.HTTPError as ex: if ex.code in [403, 404, 503]: return BeautifulSoup(ex.read()) except BaseException: if retry_count < self._config.retry: for t in range(1, self._config.retryWait): print(t, end=' ') time.sleep(1) print('') retry_count = retry_count + 1 else: PixivHelper.print_and_log('error', 'Error at getPixivPage(): {0}'.format(str(sys.exc_info()))) raise PixivException("Failed to get page: {0}".format(url), errorCode=PixivException.SERVER_ERROR)
def parsePost(self, jsPost): self.imageTitle = jsPost["title"] coverUrl = jsPost["coverImageUrl"] # Issue #930 if not self.coverImageUrl and coverUrl: self.coverImageUrl = _re_fanbox_cover.sub("fanbox", coverUrl) self.try_add(coverUrl, self.embeddedFiles) self.worksDate = jsPost["publishedDatetime"] self.worksDateDateTime = datetime_z.parse_datetime(self.worksDate) self.updatedDate = jsPost["updatedDatetime"] self.updatedDateDatetime = datetime_z.parse_datetime(self.updatedDate) if "feeRequired" in jsPost: self.feeRequired = jsPost["feeRequired"] # Issue #420 if self._tzInfo is not None: self.worksDateDateTime = self.worksDateDateTime.astimezone( self._tzInfo) self.type = jsPost["type"] if self.type not in FanboxPost._supportedType: raise PixivException( f"Unsupported post type = {self.type} for post = {self.imageId}", errorCode=9999, htmlPage=jsPost) self.likeCount = int(jsPost["likeCount"]) if jsPost["body"] is None: self.is_restricted = True
def getPixivPage(self, url, referer="https://www.pixiv.net", returnParsed=True): ''' get page from pixiv and return as parsed BeautifulSoup object or response object. throw PixivException as server error ''' url = self.fixUrl(url) retry_count = 0 while True: req = urllib2.Request(url) req.add_header('Referer', referer) try: page = self.open(req) if returnParsed: parsedPage = BeautifulSoup(page.read()) return parsedPage else: return page except Exception as ex: if isinstance(ex, urllib2.HTTPError): if ex.code in [403, 404, 503]: return BeautifulSoup(ex.read()) if retry_count < self._config.retry: for t in range(1, self._config.retryWait): print(t, end=' ') time.sleep(1) print('') retry_count = retry_count + 1 else: raise PixivException("Failed to get page: " + ex.message, errorCode=PixivException.SERVER_ERROR)
def ParseMangaImagesNew(self, page, _br): urls = [] mangaSection = page.find("section", attrs={'class':'manga'}) links = mangaSection.findAll('a') ## /member_illust.php?mode=manga_big&illust_id=46279245&page=0 if _br is None: import PixivBrowserFactory _br = PixivBrowserFactory.getExistingBrowser() for link in links: try: href = _br.fixUrl(link["href"]) print "Fetching big image page:", href bigPage = _br.getPixivPage(url=href, referer = "http://www.pixiv.net/member_illust.php?mode=manga&illust_id=" + str(self.imageId)) bigImg = bigPage.find('img') imgUrl = bigImg["src"] print "Found: ", imgUrl urls.append(imgUrl) bigImg.decompose() bigPage.decompose() del bigImg del bigPage except Exception as ex: print ex total = page.find("span", attrs={'class':'total'}) if total is not None: self.imageCount = int(total.string) if self.imageCount != len(urls): raise PixivException("Different images count: " + str(self.imageCount) + " != " + str(len(urls))) return urls
def open_with_retry( self, url, data=None, timeout=mechanize._sockettimeout._GLOBAL_DEFAULT_TIMEOUT, retry=0): retry_count = 0 if retry == 0 and self._config is not None: retry = self._config.retry while True: try: return self.open(url, data, timeout) except Exception as ex: if isinstance(ex, urllib2.HTTPError): raise if retry_count < retry: for t in range(1, self._config.retryWait): print(t, end=' ') time.sleep(1) print('') retry_count = retry_count + 1 else: raise PixivException("Failed to get page: " + ex.message, errorCode=PixivException.SERVER_ERROR)
def open_with_retry(self, url, data=None, timeout=60, retry=0): ''' Return response object with retry.''' retry_count = 0 if retry == 0 and self._config is not None: retry = self._config.retry while True: try: return self.open(url, data, timeout) except urllib.error.HTTPError: raise except BaseException: exc_value = sys.exc_info()[1] if retry_count < retry: print(exc_value, end=' ') for t in range(1, self._config.retryWait): print(t, end=' ') time.sleep(1) print('') retry_count = retry_count + 1 else: temp = url if isinstance(url, urllib.request.Request): temp = url.full_url PixivHelper.print_and_log('error', 'Error at open_with_retry(): {0}'.format(str(sys.exc_info()))) raise PixivException("Failed to get page: {0}, please check your internet connection/firewall/antivirus." .format(temp), errorCode=PixivException.SERVER_ERROR)
def parsePost(self, jsPost): self.imageTitle = jsPost["title"] self.coverImageUrl = jsPost["coverImageUrl"] if self.coverImageUrl is not None and self.coverImageUrl not in self.embeddedFiles: self.embeddedFiles.append(jsPost["coverImageUrl"]) self.worksDate = jsPost["publishedDatetime"] self.worksDateDateTime = datetime_z.parse_datetime(self.worksDate) # Issue #420 if self._tzInfo is not None: self.worksDateDateTime = self.worksDateDateTime.astimezone( self._tzInfo) self.updatedDatetime = jsPost["updatedDatetime"] self.type = jsPost["type"] if self.type not in FanboxPost._supportedType: raise PixivException( "Unsupported post type = {0} for post = {1}".format( self.type, self.imageId), errorCode=9999, htmlPage=jsPost) self.likeCount = int(jsPost["likeCount"]) if jsPost["body"] is None: self.is_restricted = True
def ParseImages(self, page, mode=None, _br=None): if page == None: raise PixivException('No page given', errorCode = PixivException.NO_PAGE_GIVEN) if mode == None: mode = self.imageMode del self.imageUrls[:] if mode == 'big' or mode == 'bigNew': self.imageUrls.append(self.ParseBigImages(page)) elif mode == 'manga': self.imageUrls = self.CheckMangaType(page, _br) elif mode == 'ugoira_view': self.imageUrls.append(self.ParseUgoira(page)) if len(self.imageUrls) == 0: raise PixivException('No images found for: '+ str(self.imageId), errorCode = PixivException.NO_IMAGES) return self.imageUrls
def parse(self): js = json.loads(self.novel_json_str) if js["error"]: raise PixivException("Cannot get novel details", errorCode=PixivException.UNKNOWN_IMAGE_ERROR, htmlPage=self.novel_json_str) root = js["body"] self.imageTitle = root["title"] self.content = root["content"] self.artist_id = root["userId"] self.bookmark_count = root["bookmarkCount"] self.image_response_count = root["imageResponseCount"] self.seriesNavData = root["seriesNavData"] if root["seriesNavData"] is not None: self.seriesId = root["seriesNavData"]["seriesId"] self.seriesOrder = root["seriesNavData"]["order"] self.isOriginal = root["isOriginal"] self.isBungei = root["isBungei"] self.language = root["language"] self.xRestrict = root["xRestrict"] # datetime self.worksDateDateTime = datetime_z.parse_datetime(root["createDate"]) self.uploadDate = datetime_z.parse_datetime(root["uploadDate"]) self.js_createDate = root["createDate"] # store for json file if self._tzInfo is not None: self.worksDateDateTime = self.worksDateDateTime.astimezone( self._tzInfo) self.uploadDate = self.uploadDate.astimezone(self._tzInfo) tempDateFormat = self.dateFormat or "%Y-%m-%d" # 2018-07-22, else configured in config.ini self.worksDate = self.worksDateDateTime.strftime(tempDateFormat) # tags self.imageTags = list() self.tags = list() tags = root["tags"] if tags is not None: tags = root["tags"]["tags"] for tag in tags: self.imageTags.append(tag["tag"]) self.tags.append(PixivTagData(tag["tag"], tag)) # append original tag if root["isOriginal"]: self.imageTags.append("オリジナル") tag = { "tag": "オリジナル", "locked": True, "deletable": False, "userId": "", "romaji": "original", "translation": { "en": "original" } } self.tags.append(PixivTagData(tag["tag"], tag))
def __init__(self, page): js = demjson.decode(page) if js["error"]: raise PixivException("Error when requesting Fanbox", 9999, page) if js["body"] is not None: self.parseSupportedArtists(js["body"])
def getEmbedData(self, embedData): if embedData["serviceProvider"] == "twitter": return "<a href='https://twitter.com/_/status/{0}'>twitter post: {0}</a>".format(embedData["contentId"]) else: raise PixivException("Unsupported embed provider = {0} for post = {1}".format(embedData["serviceProvider"], self.imageId), errorCode=9999, htmlPage=jsPost)
def parse_series_content(self, page_info, current_page): js = json.loads(page_info) if js["error"]: raise PixivException("Cannot get novel series content details", errorCode=PixivException.UNKNOWN_IMAGE_ERROR, htmlPage=page_info) self.series_list.extend(js["body"]["seriesContents"]) self.series_list_str[current_page] = page_info
def parseList(filename, rootDir=None): '''read list.txt and return the list of PixivListItem''' l = list() if not os.path.exists(filename) : raise PixivException("File doesn't exists or no permission to read: " + filename, errorCode=PixivException.FILE_NOT_EXISTS_OR_NO_WRITE_PERMISSION) reader = PixivHelper.OpenTextFile(filename) lineNo = 1 try: for line in reader: originalLine = line ##PixivHelper.safePrint("Processing: " + line) if line.startswith('#') or len(line) < 1: continue if len(line.strip()) == 0: continue line = PixivHelper.toUnicode(line) line = line.strip() items = line.split(" ", 1) member_id = int(items[0]) path = "" if len(items) > 1: path = items[1].strip() path = path.replace('\"', '') if rootDir != None: path = path.replace('%root%', rootDir) else: path = path.replace('%root%', '') path = os.path.abspath(path) # have drive letter if re.match(r'[a-zA-Z]:', path): dirpath = path.split(os.sep, 1) dirpath[1] = PixivHelper.sanitizeFilename(dirpath[1], None) path = os.sep.join(dirpath) else: path = PixivHelper.sanitizeFilename(path, rootDir) path = path.replace('\\\\', '\\') path = path.replace('\\', os.sep) listItem = PixivListItem(member_id, path) l.append(listItem) lineNo = lineNo + 1 originalLine = "" except UnicodeDecodeError: PixivHelper.GetLogger().exception("PixivListItem.parseList(): Invalid value when parsing list") PixivHelper.printAndLog('error', 'Invalid value: {0} at line {1}, try to save the list.txt in UTF-8.'.format(originalLine, lineNo)) except: PixivHelper.GetLogger().exception("PixivListItem.parseList(): Invalid value when parsing list") PixivHelper.printAndLog('error', 'Invalid value: {0} at line {1}'.format(originalLine, lineNo)) reader.close() return l
def __init__(self, artist_id, page): self.artistId = int(artist_id) js = demjson.decode(page) if js["error"]: raise PixivException("Error when requesting Fanbox artist: {0}".format(artistId), 9999, page) if js["body"] is not None: self.parsePosts(js["body"])
def parse(self): js = json.loads(self.series_str) if js["error"]: raise PixivException("Cannot get novel series content details", errorCode=PixivException.UNKNOWN_IMAGE_ERROR, htmlPage=self.series_str) # from publishedContentCount or total or displaySeriesContentCount ???? self.total = js["body"]["total"] self.series_name = js["body"]["title"]
def __init__(self, mid=0, page=None, fromImage=False): if page != None: if self.IsNotLoggedIn(page): raise PixivException('Not Logged In!', errorCode=PixivException.NOT_LOGGED_IN) if self.IsUserNotExist(page): raise PixivException( 'User ID not exist/deleted!', errorCode=PixivException.USER_ID_NOT_EXISTS) if self.IsUserSuspended(page): raise PixivException( 'User Account is Suspended!', errorCode=PixivException.USER_ID_SUSPENDED) ## detect if there is any other error errorMessage = self.IsErrorExist(page) if errorMessage != None: raise PixivException( 'Member Error: ' + errorMessage, errorCode=PixivException.OTHER_MEMBER_ERROR) ## detect if there is server error errorMessage = self.IsServerErrorExist(page) if errorMessage != None: raise PixivException('Member Error: ' + errorMessage, errorCode=PixivException.SERVER_ERROR) ## detect if image count != 0 if not fromImage: self.ParseImages(page) ## parse artist info self.ParseInfo(page, fromImage) ## check if no images if len(self.imageList) > 0: self.haveImages = True else: self.haveImages = False ## check if the last page self.CheckLastPage(page)
def __init__(self, manga_series_id: int, current_page: int, payload: str): self.manga_series_id = manga_series_id self.current_page = current_page if payload is not None: js = json.loads(payload) if js["error"]: raise PixivException(message=js["message"], errorCode=PixivException.OTHER_ERROR, htmlPage=payload) self.parse_info(js["body"])
def __init__(self, js_str, type_mode): js_data = json.loads(js_str) if bool(js_data["error"]): raise PixivException(js_data["message"], errorCode=PixivException.OTHER_ERROR) self.last_id = js_data["body"]["lastId"] self.images = js_data["body"]["illusts"] self.type_mode = type_mode
def __init__(self, iid=0, page=None, parent=None, fromBookmark=False, bookmark_count=-1, image_response_count=-1, dateFormat=None): self.artist = parent self.fromBookmark = fromBookmark self.bookmark_count = bookmark_count self.imageId = iid self.imageUrls = [] self.dateFormat = dateFormat self.descriptionUrlList = [] if page is not None: ## check is error page if self.IsNotLoggedIn(page): raise PixivException('Not Logged In!', errorCode=PixivException.NOT_LOGGED_IN) if self.IsNeedPermission(page): raise PixivException('Not in MyPick List, Need Permission!', errorCode=PixivException.NOT_IN_MYPICK) if self.IsNeedAppropriateLevel(page): raise PixivException('Public works can not be viewed by the appropriate level!', errorCode=PixivException.NO_APPROPRIATE_LEVEL) if self.IsDeleted(page): raise PixivException('Image not found/already deleted!', errorCode=PixivException.IMAGE_DELETED) if self.IsGuroDisabled(page): raise PixivException('Image is disabled for under 18, check your setting page (R-18/R-18G)!', errorCode=PixivException.R_18_DISABLED) ## check if there is any other error if self.IsErrorPage(page): raise PixivException('An error occurred!', errorCode=PixivException.OTHER_IMAGE_ERROR) ## detect if there is any other error errorMessage = self.IsErrorExist(page) if errorMessage is not None: raise PixivException('Image Error: ' + errorMessage, errorCode=PixivException.UNKNOWN_IMAGE_ERROR) ## detect if there is server error errorMessage = self.IsServerErrorExist(page) if errorMessage is not None: raise PixivException('Image Error: ' + errorMessage, errorCode=PixivException.SERVER_ERROR) ## parse artist information if self.artist is None: self.artist = PixivArtist(page=page, fromImage=True) if fromBookmark and self.originalArtist is None: self.originalArtist = PixivArtist(page=page, fromImage=True) else: self.originalArtist = self.artist ## parse image information self.ParseInfo(page) self.ParseTags(page) self.ParseWorksData(page)
def __init__(self, iid=0, page=None, parent=None, fromBookmark=False, bookmark_count=-1, image_response_count=-1, dateFormat=None, tzInfo=None): self.artist = parent self.fromBookmark = fromBookmark self.bookmark_count = bookmark_count self.imageId = iid self.imageUrls = [] self.dateFormat = dateFormat self.descriptionUrlList = [] self._tzInfo = tzInfo if page is not None: # Issue #556 payload = parseJs(page) # check error if payload is None: parsed = BeautifulSoup(page, features="html5lib") if self.IsNotLoggedIn(parsed): raise PixivException('Not Logged In!', errorCode=PixivException.NOT_LOGGED_IN, htmlPage=page) if self.IsNeedPermission(parsed): raise PixivException('Not in MyPick List, Need Permission!', errorCode=PixivException.NOT_IN_MYPICK, htmlPage=page) if self.IsNeedAppropriateLevel(parsed): raise PixivException('Public works can not be viewed by the appropriate level!', errorCode=PixivException.NO_APPROPRIATE_LEVEL, htmlPage=page) if self.IsDeleted(parsed): raise PixivException('Image not found/already deleted!', errorCode=PixivException.IMAGE_DELETED, htmlPage=page) if self.IsGuroDisabled(parsed): raise PixivException('Image is disabled for under 18, check your setting page (R-18/R-18G)!', errorCode=PixivException.R_18_DISABLED, htmlPage=page) # detect if there is any other error errorMessage = self.IsErrorExist(parsed) if errorMessage is not None: raise PixivException('Image Error: ' + str(errorMessage), errorCode=PixivException.UNKNOWN_IMAGE_ERROR, htmlPage=page) # detect if there is server error errorMessage = self.IsServerErrorExist(parsed) if errorMessage is not None: raise PixivException('Image Error: ' + str(errorMessage), errorCode=PixivException.SERVER_ERROR, htmlPage=page) parsed.decompose() del parsed # parse artist information if parent is None: temp_artist_id = list(payload["user"].keys())[0] self.artist = PixivArtist(temp_artist_id, page, fromImage=True) if fromBookmark and self.originalArtist is None: assert(self.artist is not None) self.originalArtist = PixivArtist(page=page, fromImage=True) print("From Artist Bookmark: {0}".format(self.artist.artistId)) print("Original Artist: {0}".format(self.originalArtist.artistId)) else: self.originalArtist = self.artist # parse image self.ParseInfo(payload)
def ParseToken(self, page, fromImage=False): try: # get the token from stacc feed tabFeeds = page.findAll('a', attrs={'class':'tab-feed'}) if tabFeeds is not None and len(tabFeeds) > 0: for a in tabFeeds: if str(a["href"]).find("stacc/") > 0: self.artistToken = a["href"].split("/")[-1] return self.artistToken except: raise PixivException('Cannot parse artist token, possibly different image structure.', errorCode = PixivException.PARSE_TOKEN_DIFFERENT_IMAGE_STRUCTURE)
def getEmbedData(self, embedData, jsPost) -> str: # Issue #881 content_provider_path = os.path.abspath( os.path.dirname(sys.executable) + os.sep + "content_provider.json") if not os.path.exists(content_provider_path): content_provider_path = os.path.abspath("./content_provider.json") if not os.path.exists(content_provider_path): raise PixivException( f"Missing content_provider.json, please get it from https://github.com/Nandaka/PixivUtil2/blob/master/content_provider.json! Expected location => {content_provider_path}", errorCode=PixivException.MISSING_CONFIG, htmlPage=None) cfg = demjson.decode_file(content_provider_path) embed_cfg = cfg["embedConfig"] current_provider = embedData["serviceProvider"] if current_provider in embed_cfg: if embed_cfg[current_provider]["ignore"]: return "" content_id = None for key in embed_cfg[current_provider]["keys"]: if key in embedData: content_id = embedData[key] break if content_id is not None and len(content_id) > 0: content_format = embed_cfg[current_provider]["format"] return content_format.format(content_id) else: msg = "Empty content_id for embed provider = {0} for post = {1}, please update content_provider.json." raise PixivException(msg.format(embedData["serviceProvider"], self.imageId), errorCode=9999, htmlPage=jsPost) else: msg = "Unsupported embed provider = {0} for post = {1}, please update content_provider.json." raise PixivException(msg.format(embedData["serviceProvider"], self.imageId), errorCode=9999, htmlPage=jsPost)