class Pixiv(DummySite): def __init__(self, url, username, password, proxy=None): proxies = get_proxy(proxy) requests_kwargs = { "timeout": (3, 10), } requests_kwargs.update(proxies) self.api = AppPixivAPI(**requests_kwargs) self._fetcher = PixivFetcher(**proxies) self.api.login(username, password) self._user_id = int(url.split("/")[-1]) self._dir_name = None self._total_illustrations = 0 self._fetch_user_detail() @property def fetcher(self): return self._fetcher @property def dir_name(self): assert self._dir_name is not None return self._dir_name def _fetch_user_detail(self): assert self._user_id is not None profile = self.api.user_detail(self._user_id) user = profile['user'] self._dir_name = "-".join([ user['name'], user['account'], str(user['id']), ]) self._dir_name = normalize_filename(self._dir_name) self._total_illustrations = profile['profile']['total_illusts'] return self.dir_name def _fetch_image_list(self, ): ret = self.api.user_illusts(self._user_id) while True: for illustration in ret.illusts: yield from parse_image_urls(illustration) if ret.next_url is None: break ret = self.api.user_illusts(**self.api.parse_qs(ret.next_url)) def _fetch_single_image_url(self, illustration_id): json_result = self.api.illust_detail(illustration_id) illustration_info = json_result.illust return illustration_info.image_urls['large'] @property def tasks(self): yield from self._fetch_image_list()
class APIHandler: """Singleton that handles all the API interactions in the program""" def __init__(self): self._api_thread = threading.Thread(target=self._login) self._login_started = False self._login_done = False self._api = AppPixivAPI() # Object to login and request on # Set in self.start() (because singleton is instantiated before config) self._credentials: 'dict[str, str]' self._response: 'Json' def start(self, credentials): """Start logging in. The only setup entry point that is public""" if not self._login_started: self._credentials = credentials self._api_thread.start() self._login_started = True def _await_login(self): """Wait for login to finish, then assign PixivAPI session to API""" if not self._login_done: self._api_thread.join() self._login_done = True def _login(self): self._login_with_token() def _login_with_token(self): # TODO: refresh the token if it expired try: self._response = self._api.auth( refresh_token=self._credentials['refresh_token']) except PixivError as e: print('') print(e) print('If this is a cloudflare captcha issue, just quit and retry') print('It is not a problem with koneko or pixivpy') print('Otherwise, please report to ' 'https://github.com/akazukin5151/koneko/issues') print("Press 'q' and enter to exit") #else: #print('Login success!') # Public API requests for user id def get_user_id(self) -> 'Json': self._await_login() return self._response['user']['id'] # Public API request functions for each mode @funcy.retry(tries=3, errors=(ConnectionError, PixivError)) @utils.spinner('') def artist_gallery(self, artist_user_id, offset) -> 'Json': """Mode 1""" self._await_login() return self._api.user_illusts(artist_user_id, offset=offset) @funcy.retry(tries=3, errors=(ConnectionError, PixivError)) def protected_illust_detail(self, image_id) -> 'Json': """Mode 2""" self._await_login() return self._api.illust_detail(image_id) @funcy.retry(tries=3, errors=(ConnectionError, PixivError)) def following_user_request(self, user_id, publicity, offset) -> 'Json': """Mode 3""" self._await_login() return self._api.user_following(user_id, restrict=publicity, offset=offset) @funcy.retry(tries=3, errors=(ConnectionError, PixivError)) def search_user_request(self, searchstr, offset) -> 'Json': """Mode 4""" self._await_login() return self._api.search_user(searchstr, offset=offset) @funcy.retry(tries=3, errors=(ConnectionError, PixivError)) @utils.spinner('') def illust_follow_request(self, restrict, offset) -> 'Json': """Mode 5""" self._await_login() return self._api.illust_follow(restrict=restrict, offset=offset) @funcy.retry(tries=3, errors=(ConnectionError, PixivError)) @utils.spinner('') def illust_related_request(self, image_id, offset) -> 'Json': """Mode 15 (1.5 * 10 so it's an int)""" self._await_login() return self._api.illust_related(illust_id=image_id, offset=offset) @funcy.retry(tries=3, errors=(ConnectionError, PixivError)) @utils.spinner('') def illust_recommended_request(self, offset) -> 'Json': """Mode 6""" self._await_login() return self._api.illust_recommended(offset=offset) # Download @funcy.retry(tries=3, errors=(ConnectionError, PixivError)) def protected_download(self, url, path, name) -> 'IO': """Protect api download function with funcy.retry so it doesn't crash""" self._await_login() self._api.download(url, path=path, name=name)
class PixivSpider: def __init__(self): """ Init PixivSpider """ self.api = AppPixivAPI() self.directory = 'download' if not os.path.exists('info.json'): self.data = {'illusts': []} self.count = 0 print("Create new info.json file") else: with open('info.json', 'r') as f: self.data = json.load(f) self.count = len(self.data['illusts']) print("Load existing info.json file") print("Existed illusts count: %d" % self.count) self.illusts_names = Set() for illust in self.data['illusts']: self.illusts_names.add(illust['name']) def login(self): """ Login pixiv.net """ with open('login.json') as f: login = json.load(f) self.api.login(login["username"], login["password"]) print("Login pixiv.net with user %s.", login["username"]) def exit(self): """ Stop spider and print logs """ with open('info.json', 'w') as f: json.dump(self.data, f, indent=2) print("Finish! Total downloaded illusts number: %d" % self.count) def create_download_folder(self): """ Setup image download directory """ if not os.path.exists(self.directory): os.makedirs(self.directory) def download_illusts(self, illusts=None): """ Download illusts """ for illust in illusts: image_url = illust.meta_single_page.get('original_image_url', illust.image_urls.large) print(u"👀 Found illust: %s (%s)" % (illust.title, image_url)) url_basename = os.path.basename(image_url) extension = os.path.splitext(url_basename)[1] name = "%d_%s%s" % (illust.id, illust.title, extension) name = name.replace('/', ':') if name not in self.illusts_names: self.count += 1 self.data['illusts'].append({ 'id': self.count, 'name': name, 'illust_id': illust.id, 'illustrator_id': illust.user.id, 'source_url': image_url }) self.illusts_names.add(name) name = "%d_" % self.count + name try: self.api.download(image_url, path=self.directory, name=name) except PixivError: print(u"😢 PixivError!!! Skip this illust") continue print(u"✅ Download illust: %s (%s)" % (illust.title, image_url)) else: print(u"✨ Already download: %s: %s" % (illust.title, image_url)) def get_user_ids_from_illusts(self, illusts): """ Get user ids by illusts """ user_ids = [] for illust in illusts: user_ids.append(illust.user.id) return user_ids def get_top_ranking_illusts(self, count=DEFAULT_DOWNLOAD_TOP_RANKING_COUNT, ranking_type=RankingType.DAY, date=datetime.today().strftime("%Y-%m-%d"), download=False): """ Get top ranking illusts :count: the number of illusts that we want to download :ranking_type: ranking type :date: date :download: download flag """ json_result = self.api.illust_ranking(ranking_type, date=date) illusts = self.get_illusts_from_all_pages(json_result, json_result.illusts, count, download) return illusts[:count] def get_recommended_illusts(self, count=DEFAULT_DOWNLOAD_RECOMMENDED_COUNT, content_type=ContentType.ILLUST, download=False): """ Get recommended illusts :count: the number of illusts that we want to download :content_type: content type :download: download flag """ json_result = self.api.illust_recommended(content_type) illusts = self.get_illusts_from_all_pages(json_result, json_result.illusts, count, download) return illusts[:count] def get_illusts_by_user_ids(self, user_ids, count=DEFAULT_DOWNLOAD_EACH_USER_COUNT, content_type=ContentType.ILLUST, download=False): """ Get illusts by user id """ ret = {} for user_id in user_ids: json_result = self.api.user_illusts(user_id=user_id, type=content_type) illusts = self.get_illusts_from_all_pages(json_result, json_result.illusts, count, download) ret[user_id] = illusts[:count] return ret def get_illusts_from_all_pages(self, json_result, illusts, count, download=False): """ Get illusts from all pages """ while len(json_result) != 0 and len(illusts) < count: next_qs = self.api.parse_qs(json_result.next_url) if next_qs is None: break try: json_result = self.api.illust_ranking(**next_qs) except TypeError: break illusts += json_result.illusts if download: count = min(count, len(illusts)) self.download_illusts(illusts=illusts[:count]) return illusts