def pixiv(page_url, options): from pixivpy3 import AppPixivAPI parsed = urlparse(page_url) id = int(parsed.path.split("/")[-1]) api = AppPixivAPI() secrets = h.get_secrets()["pixiv"] api.login(secrets["username"], secrets["password"]) data = api.illust_detail(id)["illust"] if len(data["meta_pages"]) == 0: image_url = data["meta_single_page"]["original_image_url"] elif options["album"]: image_url = [ image["image_urls"]["original"] for image in data["meta_pages"] ] else: image_url = data["meta_pages"][ options["index"]]["image_urls"]["original"] return Work( data["title"], ( data["user"]["account" if options["username"] else "name"], data["user"]["name" if options["username"] else "account"], ), None, data["x_restrict"] > 0, image_url, page_url, )
class Pixiv(DummySite): def __init__(self, url, username, password, proxy=None): proxies = get_proxy(proxy) requests_kwargs = { "timeout": (3, 10), } requests_kwargs.update(proxies) self.api = AppPixivAPI(**requests_kwargs) self._fetcher = PixivFetcher(**proxies) self.api.login(username, password) self._user_id = int(url.split("/")[-1]) self._dir_name = None self._total_illustrations = 0 self._fetch_user_detail() @property def fetcher(self): return self._fetcher @property def dir_name(self): assert self._dir_name is not None return self._dir_name def _fetch_user_detail(self): assert self._user_id is not None profile = self.api.user_detail(self._user_id) user = profile['user'] self._dir_name = "-".join([ user['name'], user['account'], str(user['id']), ]) self._dir_name = normalize_filename(self._dir_name) self._total_illustrations = profile['profile']['total_illusts'] return self.dir_name def _fetch_image_list(self, ): ret = self.api.user_illusts(self._user_id) while True: for illustration in ret.illusts: yield from parse_image_urls(illustration) if ret.next_url is None: break ret = self.api.user_illusts(**self.api.parse_qs(ret.next_url)) def _fetch_single_image_url(self, illustration_id): json_result = self.api.illust_detail(illustration_id) illustration_info = json_result.illust return illustration_info.image_urls['large'] @property def tasks(self): yield from self._fetch_image_list()
def pixiv_render(item, base_path, debug=False): global pixiv_api if pixiv_api is None: pixiv_api = AppPixivAPI() pixiv_api.login(pixiv_username, pixiv_password) illust_id = get_illust_id(item.get_remote()) detail = pixiv_api.illust_detail(illust_id) path = (str(detail['illust']['user']['name']) + '_' + str(detail['illust']['user']['id'])) cpath(base_path + path) urls = [] if detail['illust']['page_count'] > 1: for page in detail['illust']['meta_pages']: page_url = None try: page_url = page['image_urls']['original'] except (NameError, KeyError): try: page_url = list(page['image_urls'].values())[-1] except (NameError, KeyError): pass if page_url is not None: urls.append(page_url) if len(urls) <= 0: try: urls.append( detail['illust']['meta_single_page']['original_image_url']) except (NameError, KeyError): try: urls.append(detail['illust']['image_urls']['large']) except (NameError, KeyError): pass ret = [] for url in urls: name = str(detail['illust']['title']) + '_' + str( illust_id) + os.path.basename(url) ret.append(path + '/' + name) pixiv_api.download(url, name=name, path=os.path.abspath(base_path + path)) if debug: print('.', end='', flush=True) return ret, detail
import json import os import re from pixivpy3 import AppPixivAPI with open('credentials.json') as cf: credentials = json.load(cf) with open('urls.txt') as uf: urls = [u.split('/')[-1] for u in uf if 'pixiv.net' in u] ids = [re.findall(r'\d+', id)[0] for id in urls] api = AppPixivAPI() api.login(credentials['email'], credentials['password']) for id in ids: json_result = api.illust_detail(id) api.download(json_result.illust['meta_single_page']['original_image_url'])
class PixivDownloader: def __init__(self, client=None, username=None, password=None, log_level=logging.WARNING): if not client and (bool(username) != bool(password)): raise AttributeError( 'If no client is given both username and password must be given' ) if client: self.api = client else: self.api = AppPixivAPI() if not client and username and password: self.api.login(username, password) self.logger = logging.getLogger('PixivDownloader') stdout = logging.StreamHandler() self.logger.addHandler(stdout) self.logger.setLevel(log_level) def login(self, username=None, password=None, refresh_token=None): if refresh_token: self.logger.info('Loging in with refresh_token') elif username: self.logger.info('Loging in with username %s', username) else: self.logger.info('Loging') return self.api.auth(username=username, password=password, refresh_token=refresh_token) def logout(self): self.logger.info('Logout') self.api = AppPixivAPI() def get_id_from_url(self, url): path = urlparse(url).path ids = re.findall('(\\d+)', path) if not ids: raise ValueError('Url does not contain post id') return ids[0] def download_by_id(self, post_id, output_dir): data = self.api.illust_detail(post_id) if data.get('error'): raise PixivDownloaderError( 'Could not get post info or post doesn\'t exist.', data) return self.download(data.illust, output_dir) def download_by_url(self, url, output_dir): return self.download_by_id(self.get_id_from_url(url), output_dir) def download(self, post, output_dir): output_dir = Path(output_dir).expanduser().absolute() if not os.path.isdir(output_dir): os.makedirs(output_dir) self.logger.debug('Created dir "%s"', output_dir) if post.type == 'illust' and not post.meta_pages: downloader = self.download_illust type = 'Image' elif post.type == 'illust' and post.meta_pages: downloader = self.download_illust_collection type = 'Image Collection' elif post.type == 'ugoira': downloader = self.download_ugoira type = 'Video' elif post.type == 'manga': downloader = self.download_manga type = 'Manga' else: raise PixivDownloaderError( f'Post type "{post.type}" not supported') self.logger.info('Initialize "%s" downloader for post %s', type, post.id) return downloader(post, output_dir) def download_illust(self, post, output_dir): image_url = post.meta_single_page.get('original_image_url', post.image_urls.large) if '_webp' in image_url: extension = 'webp' else: extension = os.path.splitext(image_url)[1].lstrip('.') filename = self.get_filename(post, extension) self.logger.info('Downloading "%s"', image_url) self.api.download(image_url, path=output_dir, name=filename, replace=True) yield (Path(output_dir) / filename).absolute() def download_illust_collection(self, post, output_dir): output_dir = Path(output_dir) yield from self._downloade_meta_pages(post, output_dir) def download_manga(self, post, output_dir): output_dir = Path(output_dir) / f'{post.title}-{post.user.account}' if not output_dir.is_dir(): output_dir.mkdir(parents=True, exist_ok=True) self.logger.debug('Created dir "%s"', output_dir) yield from self._downloade_meta_pages(post, output_dir) def _downloade_meta_pages(self, post, output_dir): for index, image in enumerate(post.meta_pages, 1): image_url = image.image_urls.get('original', image.image_urls.large) if '_webp' in image_url: extension = 'webp' else: extension = os.path.splitext(image_url)[1].lstrip('.') filename = self.get_filename(post, extension, suffix=f'-{index:0>2}') self.logger.info('Downloading "%s"', image_url) self.api.download(image_url, path=str(output_dir), name=filename, replace=True) yield (output_dir / filename).absolute() def download_ugoira(self, post, output_dir): ugoira_data = self.api.ugoira_metadata(post.id).ugoira_metadata zip_url = ugoira_data.zip_urls.get('large', ugoira_data.zip_urls.medium) with TemporaryDirectory() as dir: temp_dir = Path(dir) filename = '{post.id}.zip' self.logger.info('Downloading "%s"', zip_url) self.api.download(zip_url, path=str(temp_dir), name=filename) frames_dir = temp_dir / 'frames' os.mkdir(frames_dir) self._extract_zip(temp_dir / filename, frames_dir) video_name = self.get_filename(post, 'mp4') video_file = temp_dir / video_name self._generate_mp4_from_frames(video_file, frames_dir, ugoira_data.frames[0].delay) final_path = (Path(output_dir) / video_name).absolute() shutil.move(video_file, final_path) yield final_path.absolute() def get_filename( self, post, extension, prefix=None, suffix=None, ): suffix = suffix or '' prefix = prefix or '' filename = f'{prefix}{post.id}-{post.title}{suffix}.{extension}'.replace( '/', '_').replace(' ', '_') return filename def _extract_zip(self, zip_file, output_dir): self.logger.info('Extract "%s"', zip_file) with ZipFile(zip_file, 'r') as zip_file: zip_file.extractall(output_dir) def _generate_mp4_from_frames(self, output_file, frames_dir, delay): self.logger.info('Generate video to "%s"', output_file) frames = sorted( map(lambda file: os.path.join(str(frames_dir), file), os.listdir(frames_dir))) frames = list(map(imread, frames)) framerate = 1000 / delay height, width, layers = frames[0].shape video = VideoWriter(str(output_file), VideoWriter_fourcc(*'mp4v'), framerate, (width, height)) for frame in frames: video.write(frame) destroyAllWindows() video.release()
class APIHandler: """Singleton that handles all the API interactions in the program""" def __init__(self): self._api_thread = threading.Thread(target=self._login) self._login_started = False self._login_done = False self._api = AppPixivAPI() # Object to login and request on # Set in self.start() (because singleton is instantiated before config) self._credentials: 'dict[str, str]' self._response: 'Json' def start(self, credentials): """Start logging in. The only setup entry point that is public""" if not self._login_started: self._credentials = credentials self._api_thread.start() self._login_started = True def _await_login(self): """Wait for login to finish, then assign PixivAPI session to API""" if not self._login_done: self._api_thread.join() self._login_done = True def _login(self): self._login_with_token() def _login_with_token(self): # TODO: refresh the token if it expired try: self._response = self._api.auth( refresh_token=self._credentials['refresh_token']) except PixivError as e: print('') print(e) print('If this is a cloudflare captcha issue, just quit and retry') print('It is not a problem with koneko or pixivpy') print('Otherwise, please report to ' 'https://github.com/akazukin5151/koneko/issues') print("Press 'q' and enter to exit") #else: #print('Login success!') # Public API requests for user id def get_user_id(self) -> 'Json': self._await_login() return self._response['user']['id'] # Public API request functions for each mode @funcy.retry(tries=3, errors=(ConnectionError, PixivError)) @utils.spinner('') def artist_gallery(self, artist_user_id, offset) -> 'Json': """Mode 1""" self._await_login() return self._api.user_illusts(artist_user_id, offset=offset) @funcy.retry(tries=3, errors=(ConnectionError, PixivError)) def protected_illust_detail(self, image_id) -> 'Json': """Mode 2""" self._await_login() return self._api.illust_detail(image_id) @funcy.retry(tries=3, errors=(ConnectionError, PixivError)) def following_user_request(self, user_id, publicity, offset) -> 'Json': """Mode 3""" self._await_login() return self._api.user_following(user_id, restrict=publicity, offset=offset) @funcy.retry(tries=3, errors=(ConnectionError, PixivError)) def search_user_request(self, searchstr, offset) -> 'Json': """Mode 4""" self._await_login() return self._api.search_user(searchstr, offset=offset) @funcy.retry(tries=3, errors=(ConnectionError, PixivError)) @utils.spinner('') def illust_follow_request(self, restrict, offset) -> 'Json': """Mode 5""" self._await_login() return self._api.illust_follow(restrict=restrict, offset=offset) @funcy.retry(tries=3, errors=(ConnectionError, PixivError)) @utils.spinner('') def illust_related_request(self, image_id, offset) -> 'Json': """Mode 15 (1.5 * 10 so it's an int)""" self._await_login() return self._api.illust_related(illust_id=image_id, offset=offset) @funcy.retry(tries=3, errors=(ConnectionError, PixivError)) @utils.spinner('') def illust_recommended_request(self, offset) -> 'Json': """Mode 6""" self._await_login() return self._api.illust_recommended(offset=offset) # Download @funcy.retry(tries=3, errors=(ConnectionError, PixivError)) def protected_download(self, url, path, name) -> 'IO': """Protect api download function with funcy.retry so it doesn't crash""" self._await_login() self._api.download(url, path=path, name=name)
class Pixiv: def __init__(self): self.enabled = config.getboolean('Pixiv', 'enabled', fallback=False) self._refresh_token = config.get('Pixiv', 'refresh_token', fallback=None) self._log = logging.getLogger(__name__) self._pixiv = AppPixivAPI() self._pixiv.set_accept_language(config.get('Pixiv', 'language', fallback='en-US')) self._re_twitter = re.compile(r'^https?://(www.)?twitter.com/(?P<username>.+)$') if self.enabled: self._login() def _login(self) -> None: """ Authenticate to Pixiv Returns: None """ self._log.debug(f'[PIXIV] Authenticating to Pixiv with the token {self._refresh_token}') try: self._pixiv.auth(refresh_token=self._refresh_token) except Exception as error: self._log.exception("[PIXIV] Failed to authenticate to Pixiv", exc_info=error) def get_illust(self, illust_id: int) -> typing.Optional[dict]: """ Look up the provided illustration ID from SauceNao Args: illust_id (int): Returns: typing.Optional[dict] """ if not self.enabled: return None illust = self._pixiv.illust_detail(illust_id) if 'error' in illust and 'invalid_grant' in illust['error']['message']: self._log.warning(f'Pixiv login session is no longer valid; re-authenticating with saved token') self._login() illust = self._pixiv.illust_detail(illust_id) return illust['illust'] if illust and 'illust' in illust else None def get_author(self, author_id: int) -> typing.Optional[dict]: """ Get the author for the specified illustration Args: author_id (int): Returns: typing.Optional[dict] """ if not self.enabled: return None user = self._pixiv.user_detail(author_id) if 'error' in user and 'invalid_grant' in user['error']['message']: self._log.info(f'Re-Authenticating to Pixiv with the saved refresh token') self._login() user = self._pixiv.user_detail(author_id) return user def get_author_twitter(self, author_id: int) -> typing.Optional[str]: """ Get the Pixiv artists Twitter page, if available Args: author_id (int): Returns: typing.Optional[str] """ if not self.enabled: return None user = self.get_author(author_id) twitter_url = user['profile']['twitter_url'] if (user and 'profile' in user) else None if twitter_url: match = self._re_twitter.match(twitter_url) if match and match.group('username'): return f"@{match.group('username')}"
def check_log(instance, **kwargs): log = instance title = 'test' if log.nick != 'maobot' and log.command == 'PRIVMSG' and log.nick != 'maobot_php': url_pat = re.compile(r"https?://[a-zA-Z0-9\-./?@&=:~_#]+") url_list = re.findall(url_pat, log.message) for url in url_list: r = requests.get(url) content_type_encoding = r.encoding if r.encoding != 'ISO-8859-1' else None soup = BeautifulSoup(r.content, 'html.parser', from_encoding=content_type_encoding) try: title = soup.title.string Log(command='NOTICE', channel=log.channel, nick='maobot', message=title, is_irc=False).save() except (AttributeError, TypeError, HTTPError): pass # image dl nicoseiga_pat = re.compile( 'http://seiga.nicovideo.jp/seiga/[a-zA-Z]+([0-9]+)') pixiv_pat = re.compile( 'https://www.pixiv.net/member_illust.php/?\?([a-zA-Z0-9\-./?@&=:~_#]+)') twitter_pat = re.compile( 'https://twitter.com/[a-zA-Z0-9_]+/status/\d+') image_format = ["jpg", "jpeg", "gif", "png"] if twitter_pat.match(url): try: images = soup.find("div", {"class": "permalink-tweet-container"}).find("div", {"class": "AdaptiveMedia-container"}).findAll("div", {"class": "AdaptiveMedia-photoContainer"}) except AttributeError: images = soup.findAll("div", {"class": "media"}) for image in images: try: image_url = image.find('img')['src'] img = image_from_response(requests.Session().get(image_url), Image(original_url=url, related_log=log, caption=title)) img.save() Log.objects.filter(id=log.id).update(attached_image=img.thumb) except: pass elif nicoseiga_pat.match(url): seiga_login = '******' seiga_id = nicoseiga_pat.search(url).group(1) seiga_source = 'http://seiga.nicovideo.jp/image/source/%s' % seiga_id login_post = {'mail_tel': SECRET_KEYS['nicouser'], 'password': SECRET_KEYS['nicopass']} session = requests.Session() session.post(seiga_login, data=login_post) soup = BeautifulSoup(session.get(seiga_source).text, 'lxml') image_url = 'http://lohas.nicoseiga.jp%s' % soup.find( 'div', {'class': 'illust_view_big'})['data-src'] img = image_from_response(requests.Session().get(image_url), Image(original_url=url, related_log=log)) img.save() Log.objects.filter(id=log.id).update(attached_image=img.thumb) elif pixiv_pat.match(url): from pixivpy3 import AppPixivAPI from urllib.parse import parse_qs api = AppPixivAPI() api.login(SECRET_KEYS['pixiuser'], SECRET_KEYS['pixipass']) pixiv_query = pixiv_pat.search(url).group(1) pixiv_dict = parse_qs(pixiv_query) pixiv_id = pixiv_dict['illust_id'] pixiv_illust = api.illust_detail(pixiv_id, req_auth=True).illust if 'meta_pages' in pixiv_illust and len(pixiv_illust.meta_pages) != 0: image_urls = [] if 'page' in pixiv_dict: image_urls.append(pixiv_illust.meta_pages[int(pixiv_dict['page'][0])].image_urls.large) else: for i in pixiv_illust.meta_pages: image_urls.append(i.image_urls.large) else: image_urls = [pixiv_illust.image_urls.large] for image_url in image_urls: response = api.requests_call('GET', image_url, headers={'Referer': 'https://app-api.pixiv.net/'}, stream=True) img = image_from_response(response, Image(original_url=url, related_log=log, caption=pixiv_illust.title)) img.save() Log.objects.filter(id=log.id).update(attached_image=img.thumb) elif url.split(".")[-1] in image_format: img = image_from_response(requests.Session().get(url), Image(original_url=url, related_log=log)) img.save() Log.objects.filter(id=log.id).update(attached_image=img.thumb)
class CustomPixivPy: """ A wrapper around PixivAPI and AppPixivAPI to facilitate automatic re-authentication (for required methods) and custom result format """ TOKEN_LIFESPAN = datetime.timedelta(seconds=3600) MAX_PIXIV_RESULTS = 3000 RESULTS_PER_QUERY = 50 MAX_RETRIES = 5 def __init__(self, **kwargs): super().__init__(**kwargs) # forces reauth() to trigger if any method is called: self.last_auth = datetime.datetime.fromtimestamp(0) self.refresh_token = "" self.aapi = AppPixivAPI(**kwargs) self.papi = PixivAPI(**kwargs) def login(self, refresh_token): self.refresh_token = refresh_token self.aapi.auth(refresh_token=refresh_token) self.papi.auth(refresh_token=refresh_token) self.last_auth = datetime.datetime.now() logger.debug('Pyxiv login done') return self # allows chaining @retry def illust_ranking(self, mode='daily', offset=None): self.reauth() offset = (offset or 0) // self.RESULTS_PER_QUERY + 1 return self.papi.ranking('illust', mode, offset, include_stats=False, image_sizes=['medium', 'large']) @retry def search_illust(self, word, search_target='text', sort='date', offset=None): self.reauth() offset = (offset or 0) // self.RESULTS_PER_QUERY + 1 return self.papi.search_works(word, offset, mode=search_target, types=['illustration'], sort=sort, include_stats=False, image_sizes=['medium', 'large']) @retry def illust_detail(self, illust_id, req_auth=True): self.reauth() return self.aapi.illust_detail(illust_id, req_auth) def reauth(self): """Re-authenticates with pixiv if the last login was more than TOKEN_LIFESPAN ago""" if datetime.datetime.now() - self.last_auth > self.TOKEN_LIFESPAN: self.login(self.refresh_token) self.papi.auth(refresh_token=self.refresh_token) logger.debug("Reauth successful") self.last_auth = datetime.datetime.now() def get_pixiv_results(self, offset=None, *, query="", nsfw=False): """ Get results from Pixiv as a dict If no parameters are given, SFW daily ranking is returned :param offset: Optional. page offset :param query: Optional. Specify a search query :param nsfw: Whether to allow NSFW illustrations, false by default :return: list of dicts containing illustration information """ json_result, last_error = None, None for attempt in range(1, self.MAX_RETRIES + 1): try: json_result = self.search_illust(query, offset=offset, sort='popular') \ if query else self.illust_ranking('daily_r18' if nsfw else 'daily', offset=offset) except PixivError as e: if attempt == self.MAX_RETRIES: logger.warning("Failed fetching Pixiv data: %s", e) raise e from None else: break results = [] if json_result.get('has_error'): return results it = json_result.response if query else ( x['work'] for x in json_result.response[0]['works']) for img in it: if not nsfw and img['sanity_level'] == 'black': continue # white = SFW, semi_black = questionable, black = NSFW results.append({ 'url': img['image_urls']['large'], 'thumb_url': img['image_urls']['medium'], 'title': img['title'], 'user_name': img['user']['name'], 'user_link': f"https://www.pixiv.net/en/users/{img['user']['id']}" }) logger.debug(results[-1]) return results