def getImagePage(self, image_id, parent=None, from_bookmark=False, bookmark_count=-1, image_response_count=-1): image = None response = None PixivHelper.get_logger().debug("Getting image page: %s", image_id) # https://www.pixiv.net/en/artworks/76656661 url = "https://www.pixiv.net{1}/artworks/{0}".format( image_id, self._locale) response = self.getPixivPage(url, returnParsed=False, enable_cache=False) self.handleDebugMediumPage(response, image_id) # Issue #355 new ui handler image = None try: if response.find("meta-preload-data") > 0: PixivHelper.print_and_log('debug', 'New UI Mode') # Issue #420 _tzInfo = None if self._config.useLocalTimezone: _tzInfo = PixivHelper.LocalUTCOffsetTimezone() image = PixivImage(image_id, response, parent, from_bookmark, bookmark_count, image_response_count, dateFormat=self._config.dateFormat, tzInfo=_tzInfo) if image.imageMode == "ugoira_view": ugoira_meta_url = "https://www.pixiv.net/ajax/illust/{0}/ugoira_meta".format( image_id) res = self.open_with_retry(ugoira_meta_url) meta_response = res.read() image.ParseUgoira(meta_response) res.close() if parent is None: if from_bookmark: image.originalArtist.reference_image_id = image_id self.getMemberInfoWhitecube( image.originalArtist.artistId, image.originalArtist) else: image.artist.reference_image_id = image_id self.getMemberInfoWhitecube(image.artist.artistId, image.artist) except BaseException: PixivHelper.get_logger().error("Respose data: \r\n %s", response) raise return (image, response)
def WriteSeriesData(self, seriesId, seriesDownloaded, filename): from PixivBrowserFactory import getBrowser try: # Issue #421 ensure subdir exists. PixivHelper.makeSubdirs(filename) outfile = codecs.open(filename, 'w', encoding='utf-8') except IOError: outfile = codecs.open("Series " + str(seriesId) + ".json", 'w', encoding='utf-8') PixivHelper.get_logger().exception( "Error when saving image info: %s, file is saved to: %s.json", filename, "Series " + str(seriesId) + ".json") receivedJSON = json.loads(getBrowser().getMangaSeries(seriesId, 1, returnJSON=True)) jsondata = receivedJSON["body"]["illustSeries"][0] jsondata.update(receivedJSON["body"]["page"]) pages = jsondata["total"] // 12 + 2 for x in range(2, pages): receivedJSON = json.loads(getBrowser().getMangaSeries( seriesId, x, returnJSON=True)) jsondata["series"].extend(receivedJSON["body"]["page"]["series"]) for x in [ "recentUpdatedWorkIds", "otherSeriesId", "seriesId", "isSetCover", "firstIllustId", "coverImageSl", "url" ]: del jsondata[x] outfile.write(json.dumps(jsondata, ensure_ascii=False)) outfile.close() seriesDownloaded.append(seriesId)
def WriteJSON(self, filename): info = None try: # Issue #421 ensure subdir exists. PixivHelper.makeSubdirs(filename) info = codecs.open(filename, 'w', encoding='utf-8') except IOError: info = codecs.open(str(self.imageId) + ".json", 'w', encoding='utf-8') PixivHelper.get_logger().exception("Error when saving image info: %s, file is saved to: %s.json", filename, self.imageId) # Fix Issue #481 jsonInfo = collections.OrderedDict() jsonInfo["Artist ID"] = self.artist.artistId jsonInfo["Artist Name"] = self.artist.artistName jsonInfo["Image ID"] = self.imageId jsonInfo["Title"] = self.imageTitle jsonInfo["Caption"] = self.imageCaption jsonInfo["Tags"] = self.imageTags jsonInfo["Image Mode"] = self.imageMode jsonInfo["Pages"] = self.imageCount jsonInfo["Date"] = self.worksDate jsonInfo["Resolution"] = self.worksResolution jsonInfo["Tools"] = self.worksTools jsonInfo["BookmarkCount"] = self.bookmark_count jsonInfo["Link"] = "https://www.pixiv.net/en/artworks/{0}".format(self.imageId) jsonInfo["Ugoira Data"] = self.ugoira_data if len(self.descriptionUrlList) > 0: jsonInfo["Urls"] = self.descriptionUrlList info.write(json.dumps(jsonInfo, ensure_ascii=False, indent=4)) info.close()
def WriteHtml(self, html_pattern, filename): info = None try: PixivHelper.makeSubdirs(filename) info = codecs.open(filename, 'wb', encoding='utf-8') except IOError: info = codecs.open(str(self.imageId) + ".html", 'wb', encoding='utf-8') PixivHelper.get_logger().exception("Error when saving article html: %s, file is saved to: %s.html", filename, self.imageId) page = html_pattern.replace("%coverImageUrl%", self.coverImageUrl or "") page = page.replace("%artistName%", self.parent.artistName) page = page.replace("%imageTitle%", self.imageTitle) page = page.replace("%worksDate%", self.worksDate) page = page.replace("%body_text%", self.body_text or "") page = BeautifulSoup(page, features="html5lib") imageATags = page.find_all("a", attrs={"href":True}) for imageATag in imageATags: tag = imageATag.img if tag: tag["src"] = imageATag["href"] if self.coverImageUrl == None: cover_div = page.find("div", attrs={"class":"cover"}) if cover_div: cover_div.decompose() page = page.prettify() for k,v in self.linkToFile.items(): page = page.replace(k, "file://" + v ) info.write(page) info.close()
def fanboxGetPost(self, post_id, artist=None): self.fanbox_is_logged_in() # https://fanbox.pixiv.net/api/post.info?postId=279561 # https://www.pixiv.net/fanbox/creator/104409/post/279561 p_url = f"https://api.fanbox.cc/post.info?postId={post_id}" # referer doesn't seeem to be essential p_referer = f"https://www.fanbox.cc/@{artist.creatorId if artist else ''}/posts/{post_id}" PixivHelper.get_logger().debug('Getting post detail from %s', p_url) p_req = mechanize.Request(p_url) p_req.add_header('Accept', 'application/json, text/plain, */*') p_req.add_header('Referer', p_referer) p_req.add_header('Origin', 'https://www.fanbox.cc') p_req.add_header('User-Agent', self._config.useragent) p_res = self.open_with_retry(p_req) p_response = p_res.read() PixivHelper.get_logger().debug(p_response.decode('utf8')) p_res.close() js = demjson.decode(p_response) if artist: return js else: _tzInfo = None if self._config.useLocalTimezone: _tzInfo = PixivHelper.LocalUTCOffsetTimezone() artist = FanboxArtist(js["body"]["user"]["userId"], js["body"]["creatorId"], js["body"]["user"]["name"]) self.fanboxUpdateArtistToken(artist) post = FanboxPost(post_id, artist, js["body"], _tzInfo) return post
def fanboxGetPostsFromArtist(self, artist=None, next_url=""): ''' get all posts from the supported user from https://fanbox.pixiv.net/api/post.listCreator?userId=1305019&limit=10 ''' self.fanbox_is_logged_in() # Issue #641 if next_url is None or next_url == "": url = f"https://api.fanbox.cc/post.listCreator?userId={artist.artistId}&limit=10" elif next_url.startswith("https://"): url = next_url else: url = "https://www.fanbox.cc" + next_url # Fix #494 PixivHelper.print_and_log('info', 'Getting posts from ' + url) referer = f"https://www.fanbox.cc/@{artist.creatorId}" req = mechanize.Request(url) req.add_header('Accept', 'application/json, text/plain, */*') req.add_header('Referer', referer) req.add_header('Origin', 'https://www.fanbox.cc') req.add_header('User-Agent', self._config.useragent) res = self.open_with_retry(req) response = res.read() PixivHelper.get_logger().debug(response.decode('utf8')) res.close() posts = artist.parsePosts(response) for post in posts: js = self.fanboxGetPost(post.imageId, artist) post.parsePost(js["body"]) return posts
def updateFanboxCookie(self): p_req = mechanize.Request("https://www.pixiv.net/fanbox") p_req.add_header('Accept', 'application/json, text/plain, */*') p_req.add_header('Origin', 'https://www.pixiv.net') p_req.add_header('User-Agent', self._config.useragent) try: p_res = self.open_with_retry(p_req) parsed = BeautifulSoup(p_res, features="html5lib").decode('utf-8') p_res.close() except BaseException: PixivHelper.get_logger().error('Error at updateFanboxCookie(): %s', sys.exc_info()) return False result = False if '"user":{"isLoggedIn":true' in str(parsed): result = True self._is_logged_in_to_FANBOX = True del parsed if result: for cookie in self._ua_handlers['_cookies'].cookiejar: if cookie.name == 'FANBOXSESSID': PixivHelper.print_and_log( 'info', 'New FANBOX cookie value: ' + str(cookie.value)) self._config.cookieFanbox = cookie.value self._config.writeConfig( path=self._config.configFileLocation) break else: PixivHelper.print_and_log('info', 'Could not update FANBOX cookie string.') return result
def process_batch_job(caller: PixivUtil2): PixivHelper.get_logger().info('Batch Mode from json (b).') caller.set_console_title("Batch Menu") if os.path.exists(_default_batch_filename): jobs_file = open(_default_batch_filename, encoding="utf-8") jobs = demjson.decode(jobs_file.read()) for job_name in jobs["jobs"]: print(f"Processing {job_name}") curr_job = jobs["jobs"][job_name] if "enabled" not in curr_job or not bool(curr_job["enabled"]): print(f"Skipping {job_name} because not enabled.") continue if "job_type" not in curr_job: print(f"Cannot find job_type in {job_name}") continue job_option = JobOption(curr_job, caller.__config__) if curr_job["job_type"] == '1': handle_members(caller, curr_job, job_name, job_option) elif curr_job["job_type"] == '2': handle_images(caller, curr_job, job_name, job_option) elif curr_job["job_type"] == '3': handle_tags(caller, curr_job, job_name, job_option) else: print( f"Unsupported job_type {curr_job['job_type']} in {job_name}" ) else: print( f"Cannot found {_default_batch_filename} in the application folder, see https://github.com/Nandaka/PixivUtil2/wiki/Using-Batch-Job-(Experimental) for example. " )
def WriteInfo(self, filename): info = None try: # Issue #421 ensure subdir exists. PixivHelper.makeSubdirs(filename) info = codecs.open(filename, 'wb', encoding='utf-8') except IOError: info = codecs.open(str(self.imageId) + ".txt", 'wb', encoding='utf-8') PixivHelper.get_logger().exception("Error when saving image info: %s, file is saved to: %d.txt", filename, self.imageId) info.write("ArtistID = " + str(self.artist.artistId) + "\r\n") info.write("ArtistName = " + self.artist.artistName + "\r\n") info.write("ImageID = " + str(self.imageId) + "\r\n") info.write("Title = " + self.imageTitle + "\r\n") info.write("Caption = " + self.imageCaption + "\r\n") info.write("Tags = " + ", ".join(self.imageTags) + "\r\n") info.write("Image Mode = " + self.imageMode + "\r\n") info.write("Pages = " + str(self.imageCount) + "\r\n") info.write("Date = " + self.worksDate + "\r\n") info.write("Resolution = " + self.worksResolution + "\r\n") info.write("Tools = " + self.worksTools + "\r\n") info.write("BookmarkCount = " + str(self.bookmark_count) + "\r\n") info.write("Link = https://www.pixiv.net/en/artworks/{0}\r\n".format(self.imageId)) info.write("Ugoira Data = " + str(self.ugoira_data) + "\r\n") if len(self.descriptionUrlList) > 0: info.write("Urls =\r\n") for link in self.descriptionUrlList: info.write(" - " + link + "\r\n") info.close()
def WriteHtml(self, html_template, useAbsolutePaths, filename): info = None try: PixivHelper.makeSubdirs(filename) info = codecs.open(filename, 'wb', encoding='utf-8') except IOError: info = codecs.open(str(self.imageId) + ".html", 'wb', encoding='utf-8') PixivHelper.get_logger().exception("Error when saving article html: %s, file is saved to: %s.html", filename, self.imageId) cover_image = "" if self.coverImageUrl: cover_image = f'<div class="cover"><img src="{self.coverImageUrl}"/></div>' page = html_template.replace("%coverImage%", cover_image) page = page.replace("%coverImageUrl%", self.coverImageUrl or "") page = page.replace("%artistName%", self.parent.artistName) page = page.replace("%imageTitle%", self.imageTitle) page = page.replace("%worksDate%", self.worksDate) token_body_text = "" token_images = "" token_text = "" if self.type == "article": token_body_text = f'<div class="article">{self.body_text}</div>' else: token_images = '<div class="non-article images">{0}</div>'.format( "".join(['<a href="{0}">{1}</a>'.format(x, f'<img scr="{0}"/>' if x[x.rindex(".") + 1:].lower() in ["jpg", "jpeg", "png", "bmp"] else x)for x in self.images])) token_text = '<div class="non-article text">{0}</div>'.format( "".join(['<p>{0}</p>'.format(x.rstrip()) for x in self.body_text.split("\n")])) page = page.replace("%body_text(article)%", token_body_text) page = page.replace("%images(non-article)%", token_images) page = page.replace("%text(non-article)%", token_text) page = BeautifulSoup(page, features="html5lib") imageATags = page.find_all("a", attrs={"href": True}) for imageATag in imageATags: tag = imageATag.img if tag: tag["src"] = imageATag["href"] root = page.find("div", attrs={"class": "root"}) if root: root["class"].append("non-article" if self.type != "article" else "article") page = page.prettify() html_dir = os.path.dirname(filename) for k, v in self.linkToFile.items(): if not useAbsolutePaths: try: v = os.path.relpath(v, html_dir) except ValueError: PixivHelper.get_logger().exception("Error when converting local paths to relative ones, absolute paths are used", filename, self.imageId) v = "file://" + v else: v = "file://" + v page = page.replace(k, v) info.write(page) info.close()
def login_with_username_and_password(self): PixivHelper.get_logger().info( "Login to OAuth using username and password.") oauth_response = requests.post(self._url, self._get_values_for_login(), headers=self._get_default_headers(), proxies=self._proxies, verify=self._validate_ssl) return oauth_response
def getMemberPage(self, member_id, page=1, bookmark=False, tags=None): artist = None response = None if tags is None: tags = '' limit = 48 offset = (page - 1) * limit need_to_slice = False if bookmark: # https://www.pixiv.net/ajax/user/1039353/illusts/bookmarks?tag=&offset=0&limit=24&rest=show url = 'https://www.pixiv.net/ajax/user/{0}/illusts/bookmarks?tag={1}&offset={2}&limit={3}&rest=show' url = url.format(member_id, tags, offset, limit) else: # https://www.pixiv.net/ajax/user/1813972/illusts/tag?tag=Fate%2FGrandOrder?offset=0&limit=24 # https://www.pixiv.net/ajax/user/1813972/manga/tag?tag=%E3%83%A1%E3%82%A4%E3%82%AD%E3%83%B3%E3%82%B0?offset=0&limit=24 # https://www.pixiv.net/ajax/user/5238/illustmanga/tag?tag=R-18&offset=0&limit=48 # https://www.pixiv.net/ajax/user/1813972/profile/all url = None if len(tags) > 0: url = 'https://www.pixiv.net/ajax/user/{0}/illustmanga/tag?tag={1}&offset={2}&limit={3}' url = url.format(member_id, tags, offset, limit) elif self._config.r18mode: url = 'https://www.pixiv.net/ajax/user/{0}/illustmanga/tag?tag={1}&offset={2}&limit={3}' url = url.format(member_id, 'R-18', offset, limit) else: url = 'https://www.pixiv.net/ajax/user/{0}/profile/all'.format( member_id) need_to_slice = True PixivHelper.print_and_log('info', 'Member Url: ' + url) if url is not None: # cache the response response = self._get_from_cache(url) if response is None: try: res = self.open_with_retry(url) response = res.read() res.close() except urllib.error.HTTPError as ex: if ex.code == 404: response = ex.read() self._put_to_cache(url, response) PixivHelper.get_logger().debug(response) artist = PixivArtist(member_id, response, False, offset, limit) artist.reference_image_id = artist.imageList[0] if len( artist.imageList) > 0 else 0 self.getMemberInfoWhitecube(member_id, artist, bookmark) if artist.haveImages and need_to_slice: artist.imageList = artist.imageList[offset:offset + limit] return (artist, response)
def WriteUgoiraData(self, filename): info = None try: # Issue #421 ensure subdir exists. PixivHelper.makeSubdirs(filename) info = codecs.open(filename, 'wb', encoding='utf-8') except IOError: info = codecs.open(str(self.imageId) + ".js", 'wb', encoding='utf-8') PixivHelper.get_logger().exception("Error when saving image info: %s, file is saved to: %d.js", filename, self.imageId) info.write(str(self.ugoira_data)) info.close()
def process_batch_job(caller: PixivUtil2, batch_file=None): PixivHelper.get_logger().info('Batch Mode from json (b).') caller.set_console_title("Batch Menu") if batch_file is None: batch_file = _default_batch_filename batch_file = os.path.abspath(batch_file) if os.path.exists(batch_file): jobs_file = open(batch_file, encoding="utf-8") jobs = json.load(jobs_file) total_job = len(jobs["jobs"]) active_job = len( [y for y in jobs["jobs"] if jobs["jobs"][y]["enabled"]]) PixivHelper.print_and_log( "info", f"Found {active_job} active job(s) of {total_job} jobs from {batch_file}." ) for job_name in jobs["jobs"]: PixivHelper.print_and_log("info", f"Processing {job_name}") curr_job = jobs["jobs"][job_name] if "enabled" not in curr_job or not bool(curr_job["enabled"]): PixivHelper.print_and_log( "warn", f"Skipping {job_name} because not enabled.") continue if "job_type" not in curr_job: PixivHelper.print_and_log( "error", f"Cannot find job_type in {job_name}") continue job_option = JobOption(curr_job, caller.__config__) if curr_job["job_type"] == '1': handle_members(caller, curr_job, job_name, job_option) elif curr_job["job_type"] == '2': handle_images(caller, curr_job, job_name, job_option) elif curr_job["job_type"] == '3': handle_tags(caller, curr_job, job_name, job_option) else: PixivHelper.print_and_log( "error", f"Unsupported job_type {curr_job['job_type']} in {job_name}" ) else: PixivHelper.print_and_log( "error", f"Cannot found {batch_file}, see https://github.com/Nandaka/PixivUtil2/wiki/Using-Batch-Job-(Experimental) for example. " )
def CreateUgoira(self, filename): if len(self.ugoira_data) == 0: PixivHelper.get_logger().exception("Missing ugoira animation info for image: %d", self.imageId) zipTarget = filename[:-4] + ".ugoira" if os.path.exists(zipTarget): os.remove(zipTarget) shutil.copyfile(filename, zipTarget) zipSize = os.stat(filename).st_size jsStr = self.ugoira_data[:-1] + r',"zipSize":' + str(zipSize) + r'}' with zipfile.ZipFile(zipTarget, mode="a") as z: z.writestr("animation.json", jsStr)
def WriteJSON(self, filename, JSONfilter): info = None try: # Issue #421 ensure subdir exists. PixivHelper.makeSubdirs(filename) info = codecs.open(filename, 'w', encoding='utf-8') except IOError: info = codecs.open(str(self.imageId) + ".json", 'w', encoding='utf-8') PixivHelper.get_logger().exception( "Error when saving image info: %s, file is saved to: %s.json", filename, self.imageId) if self.rawJSON: jsonInfo = self.rawJSON if JSONfilter: for x in JSONfilter.split(","): del jsonInfo[x.strip()] if self.ugoira_data: jsonInfo["Ugoira Data"] = self.ugoira_data info.write(json.dumps(jsonInfo, ensure_ascii=False, indent=4)) info.close() else: # Fix Issue #481 jsonInfo = collections.OrderedDict() jsonInfo["Artist ID"] = self.artist.artistId jsonInfo["Artist Name"] = self.artist.artistName jsonInfo["Image ID"] = self.imageId if self.seriesNavData: jsonInfo["Series Data"] = self.seriesNavData jsonInfo["Title"] = self.imageTitle jsonInfo["Caption"] = self.imageCaption jsonInfo["Tags"] = self.imageTags jsonInfo["Image Mode"] = self.imageMode jsonInfo["Pages"] = self.imageCount jsonInfo["Date"] = self.js_createDate jsonInfo["Resolution"] = self.worksResolution jsonInfo["BookmarkCount"] = self.bookmark_count jsonInfo[ "Link"] = f"https://www.pixiv.net/en/artworks/{self.imageId}" if self.ugoira_data: jsonInfo["Ugoira Data"] = self.ugoira_data if len(self.descriptionUrlList) > 0: jsonInfo["Urls"] = self.descriptionUrlList # Issue #1064 jsonInfo["titleCaptionTranslation"] = { "workTitle": self.translated_work_title, "workCaption": self.translated_work_caption } info.write(json.dumps(jsonInfo, ensure_ascii=False, indent=4)) info.close()
def loginUsingCookie(self, login_cookie=None): """ Log in to Pixiv using saved cookie, return True if success """ if login_cookie is None or len(login_cookie) == 0: login_cookie = self._config.cookie if len(login_cookie) > 0: PixivHelper.print_and_log('info', 'Trying to log in with saved cookie') self.clearCookie() self._loadCookie(login_cookie) res = self.open_with_retry('https://www.pixiv.net/') parsed = BeautifulSoup(res, features="html5lib").decode('utf-8') PixivHelper.get_logger().info('Logging in, return url: %s', res.geturl()) res.close() result = False if "logout.php" in str(parsed): result = True if "pixiv.user.loggedIn = true" in str(parsed): result = True if result: PixivHelper.print_and_log('info', 'Login successful.') PixivHelper.get_logger().info('Logged in using cookie') self.getMyId(parsed) temp_locale = str(res.geturl()).replace('https://www.pixiv.net/', '').replace('/', '') if len(temp_locale) > 0: self._locale = '/' + temp_locale PixivHelper.get_logger().info('Locale = %s', self._locale) else: PixivHelper.get_logger().info('Failed to log in using cookie') PixivHelper.print_and_log('info', 'Cookie already expired/invalid.') del parsed return result
def WriteInfo(self, filename): info = None try: # Issue #421 ensure subdir exists. PixivHelper.makeSubdirs(filename) info = codecs.open(filename, 'wb', encoding='utf-8') except IOError: info = codecs.open(str(self.imageId) + ".txt", 'wb', encoding='utf-8') PixivHelper.get_logger().exception( "Error when saving image info: %s, file is saved to: %s.txt", filename, str(self.imageId)) info.write(f"ArtistID = {self.artist.artistId}\r\n") info.write(f"ArtistName = {self.artist.artistName}\r\n") info.write(f"ImageID = {self.imageId}\r\n") info.write(f"Title = {self.imageTitle}\r\n") if self.seriesNavData: info.write(f"SeriesTitle = {self.seriesNavData['title']}\r\n") info.write(f"SeriesOrder = {self.seriesNavData['order']}\r\n") info.write(f"SeriesId = {self.seriesNavData['seriesId']}\r\n") info.write(f"Caption = {self.imageCaption}\r\n") info.write(f"Tags = {', '.join(self.imageTags)}\r\n") info.write(f"Image Mode = {self.imageMode}\r\n") info.write(f"Pages = {self.imageCount}\r\n") info.write(f"Date = {self.worksDateDateTime}\r\n") info.write(f"Resolution = {self.worksResolution}\r\n") info.write(f"BookmarkCount = {self.bookmark_count}\r\n") info.write( f"Link = http://www.pixiv.net/en/artworks/{self.imageId}\r\n" ) if self.ugoira_data: info.write(f"Ugoira Data = {self.ugoira_data}\r\n") if len(self.descriptionUrlList) > 0: info.write("Urls =\r\n") for link in self.descriptionUrlList: info.write(f" - {link}\r\n") # Issue #1064 if len(self.translated_work_title) > 0: info.write( f"Translated Title = {self.translated_work_title}\r\n") if len(self.translated_work_caption) > 0: info.write( f"Translated Caption = {self.translated_work_caption}\r\n") info.close()
def processLoginResult(self, response, username, password): PixivHelper.get_logger().info('Logging in, return url: %s', response.geturl()) # check the returned json js = response.read() PixivHelper.get_logger().info(str(js)) result = json.loads(js) # Fix Issue #181 if result["body"] is not None and "success" in result["body"]: for cookie in self._ua_handlers['_cookies'].cookiejar: if cookie.name == 'PHPSESSID': PixivHelper.print_and_log( 'info', 'new cookie value: ' + str(cookie.value)) self._config.cookie = cookie.value self._config.writeConfig( path=self._config.configFileLocation) break # check whitecube res = self.open_with_retry(result["body"]["success"]["return_to"]) parsed = BeautifulSoup(res, features="html5lib").decode('utf-8') self.getMyId(parsed) res.close() # store the username and password in memory for oAuth login self._config.username = username self._config.password = password del parsed return True else: if result["body"] is not None and "validation_errors" in result[ "body"]: PixivHelper.print_and_log( 'info', "Server reply: " + str(result["body"]["validation_errors"])) if str(result["body"]["validation_errors"]).find( "reCAPTCHA") > 0: print( "Please follow the method described in https://github.com/Nandaka/PixivUtil2/issues/505" ) else: PixivHelper.print_and_log( 'info', 'Unknown login issue, please use cookie login method.') return False
def _configureBrowser(self, config): if config is None: PixivHelper.get_logger().info("No config given") return global defaultConfig if defaultConfig is None: defaultConfig = config self._config = config if config.useProxy: if config.proxyAddress.startswith('socks'): parseResult = urllib.parse.urlparse(config.proxyAddress) assert parseResult.scheme and parseResult.hostname and parseResult.port socksType = socks.PROXY_TYPE_SOCKS5 if parseResult.scheme == 'socks5' else socks.PROXY_TYPE_SOCKS4 PixivHelper.get_logger().info( f"Using SOCKS5 Proxy= {parseResult.hostname}:{parseResult.port}" ) # https://stackoverflow.com/a/14512227 socks.setdefaultproxy(socksType, parseResult.hostname, parseResult.port) socket.socket = socks.socksocket else: self.set_proxies(config.proxy) PixivHelper.get_logger().info("Using Proxy: %s", config.proxyAddress) # self.set_handle_equiv(True) # self.set_handle_gzip(True) self.set_handle_redirect(True) self.set_handle_referer(True) self.set_handle_robots(False) self.set_debug_http(config.debugHttp) if config.debugHttp: PixivHelper.get_logger().info('Debug HTTP enabled.') # self.visit_response self.addheaders = [('User-agent', config.useragent)] # force utf-8, fix issue #184 self.addheaders = [('Accept-Charset', 'utf-8')] socket.setdefaulttimeout(config.timeout) if not self._config.enableSSLVerification: import ssl try: _create_unverified_https_context = ssl._create_unverified_context except AttributeError: # Legacy Python that doesn't verify HTTPS certificates by default pass else: # Handle target environment that doesn't support HTTPS verification ssl._create_default_https_context = _create_unverified_https_context
def write_content(self, filename): ft = open("novel_template.html") template_str = ft.read() ft.close() fh = None try: PixivHelper.makeSubdirs(filename) fh = codecs.open(filename, 'wb', encoding='utf-8') except IOError: fh = codecs.open(str(self.novel_id) + ".html", 'wb', encoding='utf-8') PixivHelper.get_logger().exception("Error when saving novel: %s, file is saved to: %s.html", filename, str(self.novel_id)) if fh is not None: content_str = template_str.replace("%title%", self.imageTitle) content_str = content_str.replace("%novel_json_str%", self.novel_json_str) fh.write(content_str) fh.close()
def getBrowser(config=None, cookieJar=None): global defaultCookieJar global defaultConfig global _browser if _browser is None: if config is not None: defaultConfig = config if cookieJar is not None: defaultCookieJar = cookieJar if defaultCookieJar is None: PixivHelper.get_logger().info("No default cookie jar available, creating... ") defaultCookieJar = http.cookiejar.LWPCookieJar() _browser = PixivBrowser(defaultConfig, defaultCookieJar) elif config is not None: defaultConfig = config _browser._configureBrowser(config) return _browser
def ParseBookmarkDetails(self, page): if page is None: raise PixivException('No page given', errorCode=PixivException.NO_PAGE_GIVEN) try: countUl = page.findAll('ul', attrs={'class': 'count-list'}) if countUl is not None and len(countUl) > 0: countA = countUl[0].findAll('a') if countA is not None and len(countA) > 0: for a in countA: if "bookmark-count" in a["class"]: self.bookmark_count = int(a.text) elif "image-response-count" in a["class"]: self.image_response_count = int(a.text) return # no bookmark count self.bookmark_count = 0 self.image_response_count = 0 except BaseException: PixivHelper.get_logger().exception("Cannot parse bookmark count for: %d", self.imageId)
def WriteInfo(self, filename): info = None try: # Issue #421 ensure subdir exists. PixivHelper.makeSubdirs(filename) info = codecs.open(filename, 'wb', encoding='utf-8') except IOError: info = codecs.open(str(self.imageId) + ".txt", 'wb', encoding='utf-8') PixivHelper.get_logger().exception( "Error when saving image info: %s, file is saved to: %s.txt", filename, self.imageId) info.write(f"ArtistID = {self.parent.artistId}\r\n") info.write(f"ArtistName = {self.parent.artistName}\r\n") info.write(f"ImageID = {self.imageId}\r\n") info.write(f"Title = {self.imageTitle}\r\n") info.write(f"Caption = {self.body_text}\r\n") if self.is_restricted: info.write(f"Image Mode = {self.type}, Restricted\r\n") else: info.write(f"Image Mode = {self.type}\r\n") info.write(f"Pages = {self.imageCount}\r\n") info.write(f"Date = {self.worksDate}\r\n") info.write(f"Like Count = {self.likeCount}\r\n") # https://www.fanbox.cc/@nekoworks/posts/928 info.write( f"Link = https://www.fanbox.cc/@{self.parent.creatorId}/posts/{self.imageId}\r\n" ) if len(self.embeddedFiles) > 0: info.write("Urls =\r\n") for link in self.embeddedFiles: info.write(" - {0}\r\n".format(link)) if len(self.embeddedFiles) > 0: info.write("descriptionUrlList =\r\n") for link in self.descriptionUrlList: info.write(" - {0}\r\n".format(link)) info.close()
def WriteInfo(self, filename): info = None try: # Issue #421 ensure subdir exists. PixivHelper.makeSubdirs(filename) info = codecs.open(filename, 'wb', encoding='utf-8') except IOError: info = codecs.open(str(self.imageId) + ".txt", 'wb', encoding='utf-8') PixivHelper.get_logger().exception( "Error when saving image info: %s, file is saved to: %s.txt", filename, self.imageId) info.write(u"ArtistID = {0}\r\n".format(self.parent.artistId)) info.write(u"ArtistName = {0}\r\n".format(self.parent.artistName)) info.write(u"ImageID = {0}\r\n".format(self.imageId)) info.write(u"Title = {0}\r\n".format(self.imageTitle)) info.write(u"Caption = {0}\r\n".format(self.body_text)) # info.write(u"Tags = " + ", ".join(self.imageTags) + "\r\n") if self.is_restricted: info.write(u"Image Mode = {0}, Restricted\r\n".format( self.type)) else: info.write(u"Image Mode = {0}\r\n".format(self.type)) info.write(u"Pages = {0}\r\n".format(self.imageCount)) info.write(u"Date = {0}\r\n".format(self.worksDate)) # info.write(u"Resolution = " + self.worksResolution + "\r\n") # info.write(u"Tools = " + self.worksTools + "\r\n") info.write(u"Like Count = {0}\r\n".format(self.likeCount)) info.write( u"Link = https://www.pixiv.net/fanbox/creator/{0}/post/{1}\r\n" .format(self.parent.artistId, self.imageId)) # info.write("Ugoira Data = " + str(self.ugoira_data) + "\r\n") if len(self.embeddedFiles) > 0: info.write("Urls =\r\n") for link in self.embeddedFiles: info.write(" - {0}\r\n".format(link)) info.close()
def fanboxLoginUsingCookie(self, login_cookie=None): """ Log in to Pixiv using saved cookie, return True if success """ result = False parsed = "" if login_cookie is None or len(login_cookie) == 0: login_cookie = self._config.cookieFanbox if len(login_cookie) > 0: PixivHelper.print_and_log('info', 'Trying to log in FANBOX with saved cookie') # self.clearCookie() self._loadCookie(login_cookie, "fanbox.cc") req = mechanize.Request("https://www.fanbox.cc") req.add_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8') req.add_header('Origin', 'https://www.fanbox.cc') req.add_header('User-Agent', self._config.useragent) try: res = self.open_with_retry(req) parsed = BeautifulSoup(res, features="html5lib").decode('utf-8') PixivHelper.get_logger().info('Logging in with cookit to Fanbox, return url: %s', res.geturl()) res.close() except BaseException: PixivHelper.get_logger().error('Error at fanboxLoginUsingCookie(): %s', sys.exc_info()) self.cookiejar.clear("fanbox.cc") if '"user":{"isLoggedIn":true' in str(parsed): result = True self._is_logged_in_to_FANBOX = True del parsed if result: PixivHelper.print_and_log('info', 'FANBOX Login successful.') else: PixivHelper.print_and_log('info', 'Not logged in to FANBOX, trying to update FANBOX cookie...') result = self.updateFanboxCookie() self._is_logged_in_to_FANBOX = result return result
def fanboxGetPostsFromArtist(self, artist_id, next_url=""): ''' get all posts from the supported user from https://www.pixiv.net/ajax/fanbox/creator?userId=15521131 ''' if next_url is None or next_url == "": url = "https://www.pixiv.net/ajax/fanbox/creator?userId={0}".format( artist_id) elif next_url.startswith("https://"): url = next_url else: url = "https://www.pixiv.net" + next_url # Fix #494 PixivHelper.print_and_log('info', 'Getting posts from ' + url) referer = "https://www.pixiv.net/fanbox/creator/{0}".format(artist_id) req = mechanize.Request(url) req.add_header('Accept', 'application/json, text/plain, */*') req.add_header('Referer', referer) req.add_header('Origin', 'https://www.pixiv.net') req.add_header('User-Agent', self._config.useragent) res = self.open_with_retry(req) response = res.read() PixivHelper.get_logger().debug(response.decode('utf8')) res.close() # Issue #420 _tzInfo = None if self._config.useLocalTimezone: _tzInfo = PixivHelper.LocalUTCOffsetTimezone() result = FanboxArtist(artist_id, response, tzInfo=_tzInfo) pixivArtist = PixivArtist(artist_id) self.getMemberInfoWhitecube(artist_id, pixivArtist) result.artistName = pixivArtist.artistName result.artistToken = pixivArtist.artistToken for post in result.posts: # https://fanbox.pixiv.net/api/post.info?postId=279561 # https://www.pixiv.net/fanbox/creator/104409/post/279561 p_url = "https://fanbox.pixiv.net/api/post.info?postId={0}".format( post.imageId) p_referer = "https://www.pixiv.net/fanbox/creator/{0}/post/{1}".format( artist_id, post.imageId) PixivHelper.get_logger().debug('Getting post detail from %s', p_url) p_req = mechanize.Request(p_url) p_req.add_header('Accept', 'application/json, text/plain, */*') p_req.add_header('Referer', p_referer) p_req.add_header('Origin', 'https://www.pixiv.net') p_req.add_header('User-Agent', self._config.useragent) p_res = self.open_with_retry(p_req) p_response = p_res.read() PixivHelper.get_logger().debug(p_response.decode('utf8')) p_res.close() js = demjson.decode(p_response) post.parsePost(js["body"]) return result
def _configureBrowser(self, config): if config is None: PixivHelper.get_logger().info("No config given") return global defaultConfig if defaultConfig is None: defaultConfig = config self._config = config if config.useProxy: if config.proxyAddress.startswith('socks'): parseResult = urllib.parse.urlparse(config.proxyAddress) assert parseResult.scheme and parseResult.hostname and parseResult.port socksType = socks.PROXY_TYPE_SOCKS5 if parseResult.scheme == 'socks5' else socks.PROXY_TYPE_SOCKS4 socks.setdefaultproxy( socksType, parseResult.hostname, parseResult.port) socks.wrapmodule(urllib) socks.wrapmodule(http.client) PixivHelper.get_logger().info("Using SOCKS Proxy: %s", config.proxyAddress) else: self.set_proxies(config.proxy) PixivHelper.get_logger().info("Using Proxy: %s", config.proxyAddress) # self.set_handle_equiv(True) # self.set_handle_gzip(True) self.set_handle_redirect(True) self.set_handle_referer(True) self.set_handle_robots(False) self.set_debug_http(config.debugHttp) if config.debugHttp: PixivHelper.get_logger().info('Debug HTTP enabled.') # self.visit_response self.addheaders = [('User-agent', config.useragent)] # force utf-8, fix issue #184 self.addheaders = [('Accept-Charset', 'utf-8')] socket.setdefaulttimeout(config.timeout)
def login(self): oauth_response = None need_relogin = True if self._refresh_token is not None: PixivHelper.get_logger().info( "Login to OAuth using refresh token.") oauth_response = requests.post(self._url, self._get_values_for_refresh(), headers=self._get_default_headers(), proxies=self._proxies, verify=self._validate_ssl) if oauth_response.status_code == 200: need_relogin = False else: PixivHelper.get_logger().info( "OAuth Refresh Token invalid, Relogin needed.") if need_relogin: oauth_response = self.login_with_username_and_password() PixivHelper.get_logger().debug("%s: %s", oauth_response.status_code, oauth_response.text) if oauth_response.status_code == 200: info = json.loads(oauth_response.text) self._refresh_token = info["response"]["refresh_token"] self._access_token = info["response"]["access_token"] elif oauth_response.status_code == 400: info = oauth_response.text try: info = json.loads(info)["errors"]["system"]["message"] except (ValueError, KeyError): pass PixivHelper.print_and_log('error', info) raise PixivException("Failed to login using OAuth", PixivException.OAUTH_LOGIN_ISSUE) return oauth_response
def process_fanbox_post(caller, config, post, artist): # caller function/method # TODO: ideally to be removed or passed as argument db = caller.__dbManager__ br = PixivBrowserFactory.getBrowser() db.insertPost(artist.artistId, post.imageId, post.imageTitle, post.feeRequired, post.worksDate, post.type) post_files = [] flag_processed = False if config.checkDBProcessHistory: result = db.selectPostByPostId(post.imageId) if result: updated_date = result[5] if updated_date is not None and post.updatedDateDatetime <= datetime_z.parse_datetime(updated_date): flag_processed = True try: if not post.is_restricted and not flag_processed: br.fanboxUpdatePost(post) if ((not post.is_restricted) or config.downloadCoverWhenRestricted) and (not flag_processed) and config.downloadCover: # cover image if post.coverImageUrl is not None: # fake the image_url for filename compatibility, add post id and pagenum fake_image_url = post.coverImageUrl.replace("{0}/cover/".format(post.imageId), "{0}_".format(post.imageId)) filename = PixivHelper.make_filename(config.filenameFormatFanboxCover, post, artistInfo=artist, tagsSeparator=config.tagsSeparator, tagsLimit=config.tagsLimit, fileUrl=fake_image_url, bookmark=None, searchTags='', useTranslatedTag=config.useTranslatedTag, tagTranslationLocale=config.tagTranslationLocale) filename = PixivHelper.sanitize_filename(filename, config.rootDirectory) post.linkToFile[post.coverImageUrl] = filename print("Downloading cover from {0}".format(post.coverImageUrl)) print("Saved to {0}".format(filename)) referer = "https://www.pixiv.net/fanbox/creator/{0}/post/{1}".format(artist.artistId, post.imageId) # don't pass the post id and page number to skip db check (result, filename) = PixivDownloadHandler.download_image(caller, post.coverImageUrl, filename, referer, config.overwrite, config.retry, config.backupOldFile, image=post) post_files.append((post.imageId, -1, filename)) PixivHelper.get_logger().debug("Download %s result: %s", filename, result) else: PixivHelper.print_and_log("info", "No Cover Image for post: {0}.".format(post.imageId)) if post.is_restricted: PixivHelper.print_and_log("info", "Skipping post: {0} due to restricted post.".format(post.imageId)) return if flag_processed: PixivHelper.print_and_log("info", "Skipping post: {0} because it was downloaded before.".format(post.imageId)) return if post.images is None or len(post.images) == 0: PixivHelper.print_and_log("info", "No Image available in post: {0}.".format(post.imageId)) else: current_page = 0 print("Image Count = {0}".format(len(post.images))) for image_url in post.images: # fake the image_url for filename compatibility, add post id and pagenum fake_image_url = image_url.replace("{0}/".format(post.imageId), "{0}_p{1}_".format(post.imageId, current_page)) filename = PixivHelper.make_filename(config.filenameFormatFanboxContent, post, artistInfo=artist, tagsSeparator=config.tagsSeparator, tagsLimit=config.tagsLimit, fileUrl=fake_image_url, bookmark=None, searchTags='', useTranslatedTag=config.useTranslatedTag, tagTranslationLocale=config.tagTranslationLocale) filename = PixivHelper.sanitize_filename(filename, config.rootDirectory) post.linkToFile[image_url] = filename referer = "https://www.pixiv.net/fanbox/creator/{0}/post/{1}".format(artist.artistId, post.imageId) print("Downloading image {0} from {1}".format(current_page, image_url)) print("Saved to {0}".format(filename)) # filesize detection and overwrite issue _oldvalue = config.alwaysCheckFileSize config.alwaysCheckFileSize = False # don't pass the post id and page number to skip db check (result, filename) = PixivDownloadHandler.download_image(caller, image_url, filename, referer, False, # config.overwrite somehow unable to get remote filesize config.retry, config.backupOldFile, image=post) if result == PixivConstant.PIXIVUTIL_ABORTED: raise KeyboardInterrupt() post_files.append((post.imageId, current_page, filename)) PixivHelper.get_logger().debug("Download %s result: %s", filename, result) config.alwaysCheckFileSize = _oldvalue current_page = current_page + 1 # Implement #447 filename = PixivHelper.make_filename(config.filenameFormatFanboxInfo, post, artistInfo=artist, tagsSeparator=config.tagsSeparator, tagsLimit=config.tagsLimit, fileUrl="{0}".format(post.imageId), bookmark=None, searchTags='', useTranslatedTag=config.useTranslatedTag, tagTranslationLocale=config.tagTranslationLocale) filename = PixivHelper.sanitize_filename(filename, config.rootDirectory) if config.writeImageInfo: post.WriteInfo(filename + ".txt") if config.writeHtml: if post.type == "article" or (len(post.images) >= config.minImageCountForNonArticle and len(post.body_text) > config.minTextLengthForNonArticle): html_template = PixivConstant.HTML_TEMPLATE if os.path.isfile("template.html"): reader = PixivHelper.open_text_file("template.html") html_template = reader.read() reader.close() post.WriteHtml(html_template, config.useAbsolutePathsInHtml, filename + ".html") if config.writeUrlInDescription: PixivHelper.write_url_in_description(post, config.urlBlacklistRegex, config.urlDumpFilename) finally: if len(post_files) > 0: db.insertPostImages(post_files) db.updatePostUpdateDate(post.imageId, post.updatedDate)