def __init__(self, **kwargs): super().__init__(**kwargs) # forces reauth() to trigger if any method is called: self.last_auth = datetime.datetime.fromtimestamp(0) self.refresh_token = "" self.aapi = AppPixivAPI(**kwargs) self.papi = PixivAPI(**kwargs)
class SetuService(): def __init__(self, username, password): self._username = username self._password = password _fglfile = open('src/main/resources/fgl.txt', 'r') self._fgl = [] for line in _fglfile.readlines(): self._fgl.append(line.strip()) _fglfile.close() self._api = PixivAPI(**_REQUESTS_KWARGS) self._api.login(self._username, self._password) def search(self, keyword, mode): _res = self._api.search_works(keyword, types=["illustration"], per_page=500, mode=mode, sort="popular") _illusts = _res.response #print(_res) for i in _illusts: if str(i.id) not in self._fgl: f = open('src/main/resources/fgl.txt', 'a') self._fgl.append(str(i.id)) f.write(str(i.id) + '\n') f.close() return i.image_urls.large.replace( "i.pximg.net", "i.pixiv.cat") + " " + str(i.id)
def __init__( self, auth, work_path=os.path.abspath('../pixiv/'), ): self._api = PixivAPI() self._api.login(*auth) self._wd = work_path
def __connect(self): if self.client is None: try: self.client = PixivAPI() self.client.login(cfg['pixiv']['帳號'], cfg['pixiv']['密碼']) except Exception as e: raise e return False return True
class Pixiv(Thread): def __init__(self): Thread.__init__(self) def run(self): self.client = PixivAPI() self.client.login(cfg['pixiv']['帳號'], cfg['pixiv']['密碼']) while True:
def __init__(self, _user, _pass, default_path=r'F:/PIXIV'): self._user, self._pass = (_user, _pass) self.api = PixivAPI() self.pixiv_utils = PixivUtils() self.pastes = Queue() self.default_path = default_path self.local_pixiv_ids = []
def __init__(self, username, password): self._username = username self._password = password _fglfile = open('src/main/resources/fgl.txt', 'r') self._fgl = [] for line in _fglfile.readlines(): self._fgl.append(line.strip()) _fglfile.close() self._api = PixivAPI(**_REQUESTS_KWARGS) self._api.login(self._username, self._password)
def __init__(self, auto_re_login=True, **requests_kwargs): self.auto_re_login = auto_re_login self._requests_kwargs = requests_kwargs self._papi = PixivAPI(**requests_kwargs) self._aapi = AppPixivAPI(**requests_kwargs) self._has_auth = False self._last_login = None self._check_auth_lock = Lock() self._username = None self._password = None
def __init__(self, name, app_config={}): config_path = Path(app_config.get('handlers_config_dir', '.')) / 'pixiv.toml' data_path = Path(app_config.get('data_dir', './data/')) / '{}.toml'.format(name) self.config = Config(config_path, write_defaults=True, defaults={ 'refresh': 'xxxx', }) self.config.save() self.data = Config(data_path) self.age_filter = None self.api = PixivAPI() if self.config.get('refresh'): print('logging in to Pixiv...') login_response = self.api.auth(refresh_token=self.config['refresh']) print('logged in into account {0.name} ({0.account}) [{0.id}]'.format(login_response['response']['user']))
def __init__(self, dbDict, config): self.config = config self.dbDict = dbDict self.username = config['PIXIV_USERNAME'] self.password = config['PIXIV_PASSWORD'] self.imageDirectory = os.path.join(config['PIXIV_DOWNLOAD_DIRECTORY'], 'images') self.ugoiraDirectory = os.path.join(config['PIXIV_DOWNLOAD_DIRECTORY'], 'ugoira') self.avatarDirectory = os.path.join(config['PIXIV_DOWNLOAD_DIRECTORY'], 'avatars') os.makedirs(self.imageDirectory, exist_ok=True) os.makedirs(self.ugoiraDirectory, exist_ok=True) os.makedirs(self.avatarDirectory, exist_ok=True) self.api = PixivAPI() self.authorize()
def __init__(self, dbDict, config): self.dbDict = dbDict self.username = config['PIXIV_USERNAME'] self.password = config['PIXIV_PASSWORD'] self.downloadDirectory = config['PIXIV_DOWNLOAD_DIRECTORY'] self.avatarDirectory = config['PIXIV_AVATAR_DIRECTORY'] os.makedirs(self.downloadDirectory, exist_ok=True) os.makedirs(self.avatarDirectory, exist_ok=True) self.api = PixivAPI() self.authorize()
def importIllustJSON(self): #Login to Pixiv API self.api = PixivAPI() self.api.login(pixivLogin["pixivusername"], pixivLogin["pixivpassword"]) userURL = "https://www.pixiv.net/member_id=" self.JSON = self.api.works(self.ID)['response'][0] self.manga = self.JSON['is_manga'] self.account = self.JSON['user']['account'] self.name = self.JSON['user']['name'] self.user_ID = self.JSON['user']['id'] self.user_URL = userURL + str(self.user_ID) self.title = self.JSON['title'] self.tags = self.JSON['tags'] self.pages = self.JSON['page_count'] if self.pages > 1: for page in range(self.pages - 1): self.image_URLs.append(self.JSON['metadata']["pages"][page] ["image_urls"]['large']) else: self.image_URLs.append(self.JSON['image_urls']['large'])
class CustomPixivPy: """ A wrapper around PixivAPI and AppPixivAPI to facilitate automatic re-authentication (for required methods) and custom result format """ TOKEN_LIFESPAN = datetime.timedelta(seconds=3600) MAX_PIXIV_RESULTS = 3000 RESULTS_PER_QUERY = 50 MAX_RETRIES = 5 def __init__(self, **kwargs): super().__init__(**kwargs) # forces reauth() to trigger if any method is called: self.last_auth = datetime.datetime.fromtimestamp(0) self.refresh_token = "" self.aapi = AppPixivAPI(**kwargs) self.papi = PixivAPI(**kwargs) def login(self, refresh_token): self.refresh_token = refresh_token self.aapi.auth(refresh_token=refresh_token) self.papi.auth(refresh_token=refresh_token) self.last_auth = datetime.datetime.now() logger.debug('Pyxiv login done') return self # allows chaining @retry def illust_ranking(self, mode='daily', offset=None): self.reauth() offset = (offset or 0) // self.RESULTS_PER_QUERY + 1 return self.papi.ranking('illust', mode, offset, include_stats=False, image_sizes=['medium', 'large']) @retry def search_illust(self, word, search_target='text', sort='date', offset=None): self.reauth() offset = (offset or 0) // self.RESULTS_PER_QUERY + 1 return self.papi.search_works(word, offset, mode=search_target, types=['illustration'], sort=sort, include_stats=False, image_sizes=['medium', 'large']) @retry def illust_detail(self, illust_id, req_auth=True): self.reauth() return self.aapi.illust_detail(illust_id, req_auth) def reauth(self): """Re-authenticates with pixiv if the last login was more than TOKEN_LIFESPAN ago""" if datetime.datetime.now() - self.last_auth > self.TOKEN_LIFESPAN: self.login(self.refresh_token) self.papi.auth(refresh_token=self.refresh_token) logger.debug("Reauth successful") self.last_auth = datetime.datetime.now() def get_pixiv_results(self, offset=None, *, query="", nsfw=False): """ Get results from Pixiv as a dict If no parameters are given, SFW daily ranking is returned :param offset: Optional. page offset :param query: Optional. Specify a search query :param nsfw: Whether to allow NSFW illustrations, false by default :return: list of dicts containing illustration information """ json_result, last_error = None, None for attempt in range(1, self.MAX_RETRIES + 1): try: json_result = self.search_illust(query, offset=offset, sort='popular') \ if query else self.illust_ranking('daily_r18' if nsfw else 'daily', offset=offset) except PixivError as e: if attempt == self.MAX_RETRIES: logger.warning("Failed fetching Pixiv data: %s", e) raise e from None else: break results = [] if json_result.get('has_error'): return results it = json_result.response if query else ( x['work'] for x in json_result.response[0]['works']) for img in it: if not nsfw and img['sanity_level'] == 'black': continue # white = SFW, semi_black = questionable, black = NSFW results.append({ 'url': img['image_urls']['large'], 'thumb_url': img['image_urls']['medium'], 'title': img['title'], 'user_name': img['user']['name'], 'user_link': f"https://www.pixiv.net/en/users/{img['user']['id']}" }) logger.debug(results[-1]) return results
def run(self): self.client = PixivAPI() self.client.login(cfg['pixiv']['帳號'], cfg['pixiv']['密碼']) while True:
class PixivClip: def __init__(self, _user, _pass, default_path=r'F:/PIXIV'): self._user, self._pass = (_user, _pass) self.api = PixivAPI() self.pixiv_utils = PixivUtils() self.pastes = Queue() self.default_path = default_path self.local_pixiv_ids = [] def login(self): self.api.login(self._user, self._pass) def get_illust(self, illust_id): if not self.api.access_token: self.login() try: json_result = self.api.works(illust_id) json_result = json_result.response except AttributeError: raise AttributeError(json_result) except utils.PixivError as e: self.login() print(e, self.api.access_token, json_result) return self.get_illust(illust_id) else: json_result = self.pixiv_utils.is_single_array(json_result) return json_result def work(self, illust_id): try: illust = self.get_illust(illust_id) except AttributeError as json_result: # print(json_result.has_error) # print(json_result.status) print(json_result) print(dir(json_result)) return else: pass illust_type = illust.type print(illust) def refresh_local_pixiv_ids(self): for i in self.pixiv_utils.list_imgs_pixiv_ids_in_dir(self.default_path): self.local_pixiv_ids.append(i) def callback(self, url): illust_id = self.pixiv_utils.parse_url_for_id(url) if illust_id in self.pastes.queue: print(illust_id, "Already In Queue!") return self.pastes.put(illust_id) def watch(self): watcher = ClipWatcher(self.pixiv_utils.is_pixiv_illust_url, self.callback) try: for i in watcher.start(): print('PIXIV LINKS:', self.pastes.qsize(), i, flush=True, end='\r') except (KeyboardInterrupt, Exception) as e: watcher.stop() return watcher def print_pastes_queue(self): while not self.pastes.empty(): illust_id = self.pastes.get() print(illust_id) input("PAUSED!!!") def begin(self): watcher = self.watch() self.print_pastes_queue()
from threading import Thread from api import cfg from pixivpy3 import PixivAPI try: pixiv_client = PixivAPI() pixiv_client.login(cfg['pixiv']['帳號'], cfg['pixiv']['密碼']) except Exception as e: print('<pixiv模組初始失敗> %s' % str(e)) pixiv_client = None class Pixiv(Thread): def __init__(self): Thread.__init__(self) def run(self): self.client = PixivAPI() self.client.login(cfg['pixiv']['帳號'], cfg['pixiv']['密碼']) while True: def pixiv_search(key): if pixiv_client is None: return '此功能現在沒有開放' f = pixiv_client.search_works(key, mode='tag') d = [] for i in f['response']: d.append('(*%s) %s\n%s' % (i['stats']['favorited_count']['public'], i['title'], 'pixiv.net/member_illust.php?mode=medium&illust_id=' % i['id']))
from pixivpy_async import PixivClient from pixivpy_async import AppPixivAPI from pixivpy_async import PixivAPI from pixivpy3 import AppPixivAPI as Sync_aapi from pixivpy3 import PixivAPI as Sync_papi sys.dont_write_bytecode = True _USERNAME = "******" _PASSWORD = "******" _TOKEN = "uXooTT7xz9v4mflnZqJUO7po9W5ciouhKrIDnI2Dv3c" saapi = Sync_aapi() # saapi.login(_USERNAME, _PASSWORD) saapi.login(refresh_token=_TOKEN) spapi = Sync_papi() # spapi.login(_USERNAME, _PASSWORD) spapi.login(refresh_token=_TOKEN) def gen_date(): """201x-0x-xx""" year = random.randint(3, 9) month = random.randint(1, 9) day = random.randint(10, 29) return '201%s-0%s-%s' % (year, month, day) def test_sync_illust(num): e = time.time() for i in range(num):
class Pixiv(Thread): search_on = 0.0 #最後查詢時間 client = None def __init__(self): Thread.__init__(self) def __connect(self): if self.client is None: try: self.client = PixivAPI() self.client.login(cfg['pixiv']['帳號'], cfg['pixiv']['密碼']) except Exception as e: raise e return False return True def run(self): pass def search(self, key, number=30): if not self.__connect(): return 'Pixiv模組發生錯誤 暫時不能使用' if number > 1000: number = 1000 if key[0] == '@': result = self.client.users_works(int(key[1:])) else: result = self.client.search_works( key, page=1, per_page=number, mode='tag', # text標題 tag標籤 exact_tag精準標籤 caption描述 period='all', # all所有 day一天內 week一週內 month一月內 order='desc', # desc新順序 asc舊順序 sort='date', ) if result.status == 'failure': return '找不到 <%s>' % (key) result_rank = [] for i in result.response: for i2 in result_rank: if i.stats.views_count > i2.stats.views_count: result_rank.insert(result_rank.index(i2), i) break else: result_rank.append(i) reply = [] for i in result_rank: self.client.download(i.image_urls.px_480mw, path=cfg['temp_dir'], name=str(i.id)) #px_128x128 px_480mw print('%s\\%s' % (cfg['temp_dir'], i.id)) url = imgur.upload('%s\\%s' % (cfg['temp_dir'], i.id)) #url = 'http://temp.maou.pw/%s' % (i.id) reply.append(url) if len(reply) >= 4: break url = 'https://www.pixiv.net/search.php?word=123&s_mode=s_tag_full' reply = reply[:4] reply.append(url) return reply def rss(self): if not self.__connect(): return 'Pixiv模組錯誤'
class Worker: writes = 0 api = PixivAPI() root_dir = "image" size = "large" latest_time = "2000-01-18 23:41:35" def login(self, username, password): self.token = self.api.login(username, password) #循环检查路径是否存在 def check_root_dir(self, path_list): if not os.path.exists(self.root_dir): os.mkdir(self.root_dir) cur_path = self.root_dir for path in path_list: cur_path = os.path.join(cur_path, path) if not os.path.exists(cur_path): os.mkdir(cur_path) def check_result(self, result, filepath): if cmp(result["status"], "success") != 0: file(filepath, "w").write(json.dumps(result)) raise PixivError(json.dumps(result)) def pull_following_works(self, time, nums=10, flt={ 'illustration': True, 'manga': True, 'ugoira': True }): result = self.api.me_following_works() self.check_root_dir(["following"]) curpg = 1 per_pg = 30 self.check_result( result, os.path.join(self.root_dir, "following", "error.json")) file(os.path.join(self.root_dir, "following", "metadata.json"), "w").write(json.dumps(result)) total = result["pagination"]["total"] nums = (nums < total and [nums] or [total])[0] for i in range(0, nums): if curpg < i / per_pg + 1: curpg += 1 result = self.api.me_following_works(page=curpg) self.check_result( result, os.path.join(self.root_dir, "following", "error.json")) idx = i % per_pg info_json = result["response"][idx] reup_time = info_json["reuploaded_time"] #过滤掉不想要的图片 if not flt[info_json["type"]]: continue #这个方法在抓取排行时不可取 #在上次扒取的时间点之前的图片就放弃 #if datetime.strptime(reup_time,"%Y-%m-%d %H:%M:%S") < time: # continue self.save_work(info_json, os.path.join(self.root_dir, "following")) if datetime.strptime(self.latest_time, "%Y-%m-%d %H:%M:%S") < datetime.strptime( reup_time, "%Y-%m-%d %H:%M:%S"): self.latest_time = reup_time return result def pull_ranking_works(self, time, nums=10, work_type="all", rank_type="daily", flt={ 'illustration': True, 'manga': True, 'ugoira': True }): self.check_root_dir([work_type, rank_type]) result = self.api.ranking(ranking_type=work_type, mode=rank_type) curpg = 1 per_pg = 50 self.check_result( result, os.path.join(self.root_dir, work_type, rank_type, "error.json")) file( os.path.join(self.root_dir, work_type, rank_type, "metadata.json"), "w").write(json.dumps(result)) total = result["pagination"]["total"] nums = (nums < total and [nums] or [total])[0] for i in range(0, nums): if curpg < i / per_pg + 1: curpg += 1 result = self.api.me_following_works(page=curpg, ranking_type=rank_type, mode=work_type) self.check_result( result, os.path.join(self.root_dir, work_type, rank_type, "error.json")) file( os.path.join(self.root_dir, work_type, rank_type, "metadata.json"), "w").write(json.dumps(result)) idx = i % per_pg info_json = result["response"][0]["works"][idx]["work"] reup_time = info_json["reuploaded_time"] #过滤掉不想要的图片 if not flt[info_json["type"]]: continue #在上次扒取的时间点之前的图片就放弃 #if datetime.strptime(reup_time,"%Y-%m-%d %H:%M:%S") < time: # continue self.save_work(info_json, os.path.join(self.root_dir, work_type, rank_type)) if datetime.strptime(self.latest_time, "%Y-%m-%d %H:%M:%S") < datetime.strptime( reup_time, "%Y-%m-%d %H:%M:%S"): self.latest_time = reup_time return result def save_work(self, info_json, path): #过滤不想保存的作品 if cmp(info_json["type"], "ugoira") == 0: self.save_ugoira(info_json, path) if cmp(info_json["type"], "illustration") == 0: self.save_image(info_json, path) if cmp(info_json["type"], "manga") == 0: self.save_image(info_json, path) return def save_ugoira(self, info_json, path): full_info = self.api.works(info_json["id"]) urls = full_info["response"][0]["metadata"]["zip_urls"] first_url = None for item in urls: first_url = urls[item] break file_name = first_url[first_url.rfind("/") + 1:] if os.path.exists(os.path.join(path, file_name)): #print u"文件已存在:跳过" return first_url file(os.path.join(path, "%s.json" % (file_name)), "w").write(json.dumps(full_info)) res = self.api.auth_requests_call("GET", first_url) data = res.content file(os.path.join(path, file_name), "wb").write(data) self.writes += int(res.headers["content-length"]) #not finish print first_url print res.headers["content-length"] return first_url def save_image(self, info_json, path): page_count = info_json["page_count"] #如果是漫画,也就是多幅图片就存在id文件夹下面 if page_count != 1: cur_path = os.path.join(path, "%d" % info_json["id"]) if not os.path.exists(cur_path): os.mkdir(cur_path) else: cur_path = path for i in range(0, page_count): origin_url = info_json["image_urls"][self.size] p_idx = origin_url.rfind("_p") if cmp(self.size, "large") == 0: r_idx = origin_url.rfind(".") else: r_idx = origin_url.rfind("_") img_url = "%s_p%d%s" % (origin_url[:p_idx], i, origin_url[r_idx:]) file_name = img_url[img_url.rfind("/") + 1:] if os.path.exists(os.path.join(cur_path, file_name)): #print u"文件已存在:跳过" continue res = self.api.auth_requests_call("GET", img_url) data = res.content file(os.path.join(cur_path, file_name), "wb").write(data) self.writes += int(res.headers["content-length"]) print os.path.join(cur_path, file_name) print res.headers["content-length"]
from create_database import configs import time from pixivpy3 import PixivAPI _REQUESTS_KWARGS = { 'proxies': { 'https': configs.proxy, }, 'verify': True, # PAPI use https, an easy way is disable requests SSL verify } start_time = time.time() api = PixivAPI(**_REQUESTS_KWARGS) api.set_auth(configs.pixiv.access_token, configs.pixiv.refresh_token) # api.login(configs.pixiv.user, configs.pixiv.passwd) # json_result = api.illust_detail(59580629) # illust = json_result.illust # print(">>> origin url: %s" % illust.image_urls['large']) # api.auth(configs.pixiv.user, configs.pixiv.passwd, configs.pixiv.refresh_token) json_result = api.works(46363414) print(json_result) illust = json_result.response[0] print(">>> %s, origin url: %s" % (illust.caption, illust.image_urls['large'])) end_time = time.time() print(end_time - start_time, 's')
class Pixiv(object): def __init__(self, dbDict, config): self.config = config self.dbDict = dbDict self.username = config['PIXIV_USERNAME'] self.password = config['PIXIV_PASSWORD'] self.imageDirectory = os.path.join(config['PIXIV_DOWNLOAD_DIRECTORY'], 'images') self.ugoiraDirectory = os.path.join(config['PIXIV_DOWNLOAD_DIRECTORY'], 'ugoira') self.avatarDirectory = os.path.join(config['PIXIV_DOWNLOAD_DIRECTORY'], 'avatars') os.makedirs(self.imageDirectory, exist_ok=True) os.makedirs(self.ugoiraDirectory, exist_ok=True) os.makedirs(self.avatarDirectory, exist_ok=True) self.api = PixivAPI() self.authorize() def authorize(self): self.api.login(self.username, self.password) def loadWorks(self): print('Retrieving Pixiv works') self.authorize() apiWorks = self.api.me_following_works( 1, self.config['MAX_WORKS_ON_PAGE']) workDicts = apiWorks['response'] workDicts = [w for w in workDicts] [self._getImageData(workDict) for workDict in workDicts] def loadExtraWorkInfo(self): updates = [] worksToUpdate = [ work for work in self.dbDict['works'].values() if work['website'] == 'Pixiv' and not work.get('imageUrls') ] if worksToUpdate: print("Found {} new Pixiv works".format(len(worksToUpdate))) for work in worksToUpdate: imageDict = work['pixivMeta'] extraInfo = { 'authorAvatarUrl': self._getAvatarUrl( str( imageDict.get('user').get('profile_image_urls').get( 'px_50x50'))), 'imageUrls': self._getImageUrls(imageDict), 'pixivMeta': '', } updates.append((work['identifier'], extraInfo)) [ self.dbDict['works'][identifier].update(extraInfo) for (identifier, extraInfo) in updates ] def _getImageData(self, imageDict): identifier = str(imageDict.get('id')) if identifier not in self.dbDict[ 'works']: # Skip images we've already loaded user = imageDict.get('user') or {} imageData = { 'identifier': identifier, 'authorName': str(user.get('name')), 'authorHandle': str(user.get('account')), 'authorAvatarUrl': None, 'profileUrl': 'http://www.pixiv.net/member.php?id=' + str(user.get('id')), 'website': 'Pixiv', 'imageTitle': str(imageDict.get('title')), 'imageUrls': None, 'imagePageUrl': 'http://www.pixiv.net/member_illust.php?mode=medium&illust_id=' + str(imageDict.get('id')), 'imageTimestamp': self._parseTime(imageDict), 'imageType': str(imageDict.get('type')), 'nsfw': str(imageDict.get('age_limit') != 'all-age'), 'width': str(imageDict.get('width')) or '500', 'height': str(imageDict.get('height')) or '500', 'success': str(imageDict.get('status') == 'success'), 'error': str(imageDict.get('errors')), 'pixivMeta': imageDict, #stores the pixiv API info to facilitate late download of images } self.dbDict['works'][identifier] = imageData def _parseTime(self, imageDict): s = max(imageDict.get('created_time', ''), imageDict.get('reupoloaded_time', '')) return datetime.datetime.strptime(s, '%Y-%m-%d %H:%M:%S').replace( tzinfo=pytz.timezone("Asia/Tokyo")).astimezone( pytz.utc).isoformat() def _getAvatarUrl(self, remoteUrl): return self._downloadImage(remoteUrl, self.avatarDirectory) def _getImageUrls(self, imageDict): workType = imageDict.get('type') if imageDict.get('is_manga'): response = self.api.works(imageDict['id']) response = response.get('response')[0] or {} metadata = response.get('metadata') or {} pages = metadata.get('pages') or [] def getMangaUrl(d): urld = d.get('image_urls') return self._generateImageUrl( urld.get('small') or urld.get('medium') or urld.get('large')) urls = [getMangaUrl(item) for item in pages] # Weird special case: "type" field in Pixiv JSON can be "manga" while "is_manga" is False # In this case there is only a single image URL and the JSON is formatted like an illustration elif workType == 'illustration' or (workType == 'manga' and not imageDict.get('is_manga')): urlDict = imageDict.get('image_urls') or {} urls = [ self._generateImageUrl( urlDict.get('small') or urlDict.get('medium') or urlDict.get('large')) ] elif workType == 'ugoira': return self._constructUgoira(imageDict.get('id')) else: #Default case; all response types seem to have at least something in image_urls urlDict = imageDict.get('image_urls') or {} urls = [ urlDict.get('small') or urlDict.get('medium') or urlDict.get('large') ] urls = [self._downloadImage(url, self.imageDirectory) for url in urls] return urls def _generateImageUrl(self, url): # Construct the URL for the full-res image. Super brittle; entirely dependent on Pixiv never changing anything leftSide = url[:url[8:].find('/') + 9] #Split on first slash after https:// rightSide = url[url.find('/img/'):].replace('_master1200', '') return leftSide + 'img-original' + rightSide def _downloadImage(self, url, directory): name = url[url.rfind('/') + 1:url.rfind('.')] extant = { name.split('.')[0]: os.path.join(directory, name) for name in os.listdir(directory) } if extant.get(name): print('Already downloaded {}'.format(url)) return extant.get(name) print('Downloading ' + url) def attemptDownload(attemptUrl, suffix): attemptUrl = '.'.join((attemptUrl.rpartition('.')[0], suffix)) return requests.get( attemptUrl, headers={'referer': attemptUrl[:attemptUrl.find('/img')]}, stream=True) r = attemptDownload(url, 'png') if r.status_code == 404: r = attemptDownload(url, 'jpg') if r.status_code == 404: r = attemptDownload(url, 'gif') if r.status_code == 200: filename = url.split('/')[-1] filepath = os.path.join(directory, filename) with open(filepath, 'wb') as f: for chunk in r: f.write(chunk) return '/'.join((directory, filename)) else: return r.status_code + ' ' + url def _constructUgoira(self, identifier): directory = os.path.join(self.ugoiraDirectory, str(identifier)) os.makedirs(directory, exist_ok=True) response = self.api.works(identifier) response = response.get('response')[0] or {} metadata = response.get('metadata') or {} frameTimes = [ 'duration {}'.format(delay['delay_msec'] / 1000) for delay in metadata.get('frames') ] zipUrl = sorted(metadata['zip_urls'].items())[-1][ 1] # I don't think zip_urls will ever be longer than 1 but ?? zipPath = self._downloadUgoiraZip(zipUrl, directory) with zipfile.ZipFile(zipPath, 'r') as zap: zap.extractall(directory) imagePaths = [ "file '{}'".format(fileName) for fileName in os.listdir(directory) if not fileName.endswith('.zip') ] frameData = '\n'.join(itertools.chain(*zip(imagePaths, frameTimes))) concatFile = os.path.join(directory, 'concat.txt') print('Writing frame data to: {}'.format(concatFile)) with open(concatFile, 'w') as f: f.write(frameData) concatFile = os.path.abspath(os.path.join(os.getcwd(), concatFile)) workingDirectory = os.path.abspath(os.path.join( os.getcwd(), directory)) outFile = os.path.join(directory, '{}.webm'.format(identifier)) ffmpeg = 'ffmpeg -n -f concat -i {} -c:v libvpx -crf 10 -b:v 2M {}.webm'.format( concatFile, identifier) print('Rendering video to {}'.format(outFile)) subprocess.run(ffmpeg, shell=True, cwd=workingDirectory) print('Finished rendering') return [outFile] def _downloadUgoiraZip(self, url, directory): print('Downloading ugoira zip: {}'.format(url)) path = os.path.join(directory, url.split('/')[-1]) if os.path.exists(path): print('Zip already downloaded; skipping') else: r = requests.get(url, headers={'referer': url[:url.find('/img')]}, stream=True) with open(path, 'wb') as f: for chunk in r.iter_content(chunk_size=1024): if chunk: # filter out keep-alive new chunks f.write(chunk) return path
def __init__(self, credentials): self.apapi = AppPixivAPI() self.papi = PixivAPI() self.apapi.auth(refresh_token=credentials[0]) self.papi.auth(refresh_token=credentials[0])
class PixivHandler: def __init__(self, name, app_config={}): config_path = Path(app_config.get('handlers_config_dir', '.')) / 'pixiv.toml' data_path = Path(app_config.get('data_dir', './data/')) / '{}.toml'.format(name) self.config = Config(config_path, write_defaults=True, defaults={ 'refresh': 'xxxx', }) self.config.save() self.data = Config(data_path) self.age_filter = None self.api = PixivAPI() if self.config.get('refresh'): print('logging in to Pixiv...') login_response = self.api.auth(refresh_token=self.config['refresh']) print('logged in into account {0.name} ({0.account}) [{0.id}]'.format(login_response['response']['user'])) def set_age_filter(self, filter): self.age_filter = filter def handle(self, feed): if feed == 'followings': data = self.api.me_following_works(image_sizes=['large', 'medium'], include_stats=False) elif feed == 'bookmarks': data = self.api.me_favorite_works() else: return [] if data['status'] != 'success': print('invalid response') print('got:') print(data) return [] results = data['response'] save_data = self.data.get(feed, {'last_id': 0}) print('latest id: {}'.format(save_data.get('last_id'))) results = list(filter(lambda x: x['id'] > save_data.get('last_id'), results)) if len(results) == 0: return [] save_data['last_id'] = results[0]['id'] self.data[feed] = save_data self.data.save() ret = [] for entry in results: print('Handling pixiv entry {}'.format(entry['id'])) if self.age_filter != None: if entry['age_limit'] in ['r18', 'r18-g'] and self.age_filter == 'safe': print('skipping because currently in safe mode') continue if entry['age_limit'] == 'all-age' and self.age_filter == 'r18': print('skipping because currently in r18 mode') continue content = '<https://www.pixiv.net/artworks/{}>'.format(entry['id']) content += '\n{} by {} ({})'.format(entry['title'], entry['user']['name'], entry['user']['account']) content += '\nTags: {}'.format(' '.join(entry['tags'])) if entry['is_manga']: print('it\'s a manga') work = self.api.works(entry['id']) if work['status'] != 'success': continue work = work['response'] if len(work) == 0: continue work = work[0] urls = [x['image_urls']['medium'] for x in work['metadata']['pages']] if len(urls) > 4: content += '\n{} more pictures not shown here'.format(len(urls) - 4) urls = urls[:4] else: if entry['width'] > 2000 or entry['height'] > 2000: content += '\n(not displaying full resolution because it is too large)' urls = [entry['image_urls']['medium']] else: urls = [entry['image_urls']['large']] files = [] index = 0 for url in urls: print('downloading picture...') response = requests.get(url, headers={'referer': 'https://pixiv.net'}) if response.status_code != 200: continue ext = Path(url).suffix files.append({'data': response.content, 'name': 'page{}{}'.format(index, ext)}) index += 1 ret.append({'content': content, 'files': files}) ret.reverse() return ret
class PixivCrawler: KKRTAG = ['弦巻こころ'] def __init__( self, auth, work_path=os.path.abspath('../pixiv/'), ): self._api = PixivAPI() self._api.login(*auth) self._wd = work_path def fetch_work(self, work_id, tag): got = False ri = self._api.works(work_id) try: r = ri.response[0] except: r = None if not r: return got url_list = [] if r.metadata: for p in r.metadata.pages: url_list.append(p.image_urls.large) else: url_list.append(r.image_urls.large) created_time = r.created_time[:10].replace('-', '') wd = os.path.join(self._wd, created_time) if not os.path.isdir(wd): os.mkdir(wd) fns = [] for url in url_list: fn = os.path.basename(url) final_fn = os.path.join(created_time, fn) _logger.info('getting %s to %s', url, wd) try: if self._api.download(url, fname=fn, path=wd): got = True shutil.move(os.path.join(wd, fn), os.path.join(wd, fn + '.download')) fns.append(final_fn) except: import sys sys.excepthook(*sys.exc_info()) if fns: meta = json.dumps(r) dmeta = { 'work_id': work_id, 'mode': tag, 'user': r.user.id, 'fn': fns, 'meta': meta, } PixivCursor.insert_update_one(dmeta) return got def get_by_tag(self, search_tag='', filter_tag=[], num=30, save_tag=''): if not search_tag and not filter_tag: return None if filter_tag: filter_tag = [x.strip().lower() for x in filter_tag] if not search_tag: search_tag = filter_tag[0] filter_tag = filter_tag[1:] if not save_tag: save_tag = search_tag filter_tag = set(filter_tag) _logger.info('search: %s filter: %s', search_tag, filter_tag) ret = 0 page = 1 while ret < num: r = self._api.search_works(search_tag, mode='tag', page=page, per_page=30) try: l = r.response except: l = None if not l: break _logger.info('get %d illusts', len(l)) for i in l: if i.type != 'illustration': continue tt = set([x.strip().lower() for x in i.tags]) if len(tt & filter_tag) != len(filter_tag): continue if self.fetch_work(i.id, save_tag): ret += 1 if ret > num: break page += 1 return ret def get_rank(self, mode='daily', num=30): ret = 0 page = 1 while ret < num: r = self._api.ranking_all(mode=mode, page=page, per_page=30) try: l = r.response[0].works except: l = None if not l: break _logger.info('get %d ranking illust', len(l)) for i in l: if i.work.type != 'illustration': continue if self.fetch_work(i.work.id, mode): ret += 1 if ret >= num: break page += 1 return ret
import random from pixivpy_async import PixivClient from pixivpy_async import AppPixivAPI from pixivpy_async import PixivAPI from pixivpy3 import AppPixivAPI as Sync_aapi from pixivpy3 import PixivAPI as Sync_papi sys.dont_write_bytecode = True _USERNAME = "******" _PASSWORD = "******" saapi = Sync_aapi() saapi.login(_USERNAME, _PASSWORD) spapi = Sync_papi() spapi.login(_USERNAME, _PASSWORD) def gen_date(): """201x-0x-xx""" year = random.randint(3, 9) month = random.randint(1, 9) day = random.randint(10, 29) return '201%s-0%s-%s' % (year, month, day) def test_sync_illust(num): e = time.time() for i in range(num): print('%s,' % i, end="")
class pixivImage: #Takes URL or ID as argument def __init__(self, *args): baseURL = "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=" self.image_URLs = [] self.directories = [] self.caption = "" self.userTags = [] self.userImported = False for arg in args: length = len(str(arg)) #If it is an ID, it is 8 digits long and an int if length == 8: self.ID = int(arg) self.URL = baseURL + str(arg) #If it's a url, it's the baseURL plus the int elif isinstance(arg, str) & length == len(baseURL) + 8: self.URL = arg try: self.ID = self.url[self.url.find("&illust_id=", 0, length ) + len("&illust_id="):length] except TypeError: print("URL is malformed") #Fix minor bad URL self.URL = baseURL + str(arg["ID"]) else: print("URL OR ID is wrong or in bad format") #Gets PixixImage attribute def __get__(self, obj, objtype): #Tries to get attribute, if it does not exist, cycles through imports then outputs error try: return getattr(obj, self.attr) except AttributeError: try: self.importIllustJSON() return self.item except AttributeError: try: self.importUserJSON() except AttributeError: print("Image does not have that attribute") pass def setCustomTags(self, tags): self.userTags = tags def setCaption(self, caption): self.caption = caption #Import info using pixivAPI into class from JSON def importIllustJSON(self): #Login to Pixiv API self.api = PixivAPI() self.api.login(pixivLogin["pixivusername"], pixivLogin["pixivpassword"]) userURL = "https://www.pixiv.net/member_id=" self.JSON = self.api.works(self.ID)['response'][0] self.manga = self.JSON['is_manga'] self.account = self.JSON['user']['account'] self.name = self.JSON['user']['name'] self.user_ID = self.JSON['user']['id'] self.user_URL = userURL + str(self.user_ID) self.title = self.JSON['title'] self.tags = self.JSON['tags'] self.pages = self.JSON['page_count'] if self.pages > 1: for page in range(self.pages - 1): self.image_URLs.append(self.JSON['metadata']["pages"][page] ["image_urls"]['large']) else: self.image_URLs.append(self.JSON['image_urls']['large']) #Imports JSON with user information. def importUserJSON(self): #Non-authenticated API login aapi = AppPixivAPI() self.userJSON = aapi.user_detail(self.user_ID) self.webpage = self.userJSON['profile']['webpage'] self.twitter_name = self.userJSON['profile']['twitter_account'] self.twitter_URL = self.userJSON['profile']['twitter_url'] self.pawoo_URL = self.userJSON['profile']['pawoo_url'] self.userImported = True #Manually import JSON information def importJSON(self): self.importIllustJSON() self.importUserJSON() #Downloads images to directory def download(self, directory=None): for URL in self.image_URLs: if directory is None: directory = os.path.dirname( os.path.abspath(__file__)) + "\\temp\\" if not os.path.exists(directory): os.makedirs(os.path.dirname(directory)) self.api.download(URL, prefix=directory) else: if not os.path.exists(directory): os.makedirs(directory) self.api.download(URL, prefix=directory) directory = directory + "\\" + str(os.path.basename(URL)) self.directories.append(directory) self.api.download(URL)
class PixivPixie: """Pixiv API interface. Remember call login() before using other methods. Attributes: auto_re_login: If true, PixivPixie will auto re-login when login token expired. """ def __init__(self, auto_re_login=True, **requests_kwargs): self.auto_re_login = auto_re_login self._requests_kwargs = requests_kwargs self._papi = PixivAPI(**requests_kwargs) self._aapi = AppPixivAPI(**requests_kwargs) self._has_auth = False self._last_login = None self._check_auth_lock = Lock() self._username = None self._password = None @property def requests_kwargs(self): """Parameters that will be passed to requests.""" return self._requests_kwargs @requests_kwargs.setter def requests_kwargs(self, requests_kwargs): self._requests_kwargs = requests_kwargs self._papi.requests_kwargs = requests_kwargs self._aapi.requests_kwargs = requests_kwargs @property def has_auth(self): """Whether the pixie has login.""" return self._has_auth @property def last_login(self): """Last login time. Will be a datetime object or None if haven't login yet.""" return self._last_login def login(self, username, password): """Login Pixiv account. Notice: The access token will expire after about 1 hour. So if you are dealing with a long time quest, remember to re-login every some time. Args: username: Your Pixiv account's username. password: Your Pixiv account's password. Returns: None. Raises: LoginFailed: An error occurred if the username and password is not match. """ if not username or not password: raise LoginFailed try: self._papi.login(username, password) # self._aapi.login(username, password) self._aapi.access_token = self._papi.access_token self._aapi.user_id = self._papi.user_id self._aapi.refresh_token = self._papi.refresh_token except PixivError: raise LoginFailed else: self._has_auth = True self._username = username self._password = password self._last_login = datetime.datetime.now() return self def check_auth(self, auto_re_login=False): """Raise error if the pixie doesn't has auth. Args: auto_re_login: If true, the PixivPixie will try to re-login when login token expired. Raises: NoAuth: If the PixivPixie hasn't login first. LoginFailed: If re-login failed. """ with self._check_auth_lock: if not self.has_auth: raise NoAuth if datetime.datetime.now() - self.last_login >= TOKEN_LIFETIME: # Token expired if auto_re_login: self.login(self._username, self._password) else: raise NoAuth @_need_auth def illust(self, illust_id): """Gets a single illust. Args: illust_id: An integer. Returns: A PixivIllust object. Raises: Any exceptions check_auth() will raise. IllustError: If the illust_id is invalid or the illust is blocked by the Pixiv account setting. """ json_result = Json(self._papi.works(illust_id)) if json_result.status != 'success': error_code = json_result.errors.system.get('code') error_message = { 206: 'Target illust not found.', 229: 'Illust browsing restricted.', } raise IllustError(illust_id, error_message.get(error_code)) return PixivIllust.from_papi(self, json_result.response[0]) @classmethod def _papi_call(cls, call_func, page=1, per_page=30, iter_target=None, extra_yield=None, **kwargs): current_page = page while current_page: json_result = Json( call_func(page=current_page, per_page=per_page, **kwargs)) if json_result.status != 'success': raise APIError(call_func, json_result.errors) if iter_target is None: target = json_result.response else: target = iter_target(json_result.response) for item in target: if extra_yield is None: yield item else: yield item, extra_yield(json_result.response) current_page = json_result.pagination.next def _aapi_call(self, call_func, **kwargs): req_auth = True while True: try: if int(kwargs['offset']) >= 5000: break except (KeyError, ValueError): pass json_result = Json(call_func(**kwargs, req_auth=req_auth)) if 'error' in json_result: raise APIError(call_func, json_result.error) yield from json_result.illusts if json_result.next_url is None: break kwargs = self._aapi.parse_qs(json_result.next_url) @query_set @_need_auth def my_following_illusts(self, until=None): """Fetch new illusts of following users. Fetch new illusts of following users. Normal user can only have the first 2000 illust while Premium user can have the first 5000. If you didn't turn off the browsing restriction in account setting, the R-18(G) illusts will be excluded. Args: until: Could be: [default] None: No limit. A string or datetime object which corresponding to the earliest creation time of illusts. Returns: A QuerySet that yield PixivIllust object. Raises: Any exceptions check_auth() will raise. """ if isinstance(until, str): until = dateutil.parser.parse(until) for json_result in self._papi_call(self._papi.me_following_works): illust = PixivIllust.from_papi(self, json_result) if until is not None and illust.creation_time < until: return yield illust @query_set @_need_auth def user_illusts(self, user_id): """Fetch a user's illusts. Fetch a user's illusts. If you didn't turn off the browsing restriction in account setting, the R-18(G) illusts will be excluded. Args: user_id: An integer. Returns: A QuerySet that yield PixivIllust object. Raises: Any exceptions check_auth() will raise. PAPIError: If the user_id is invalid. """ for json_result in self._papi_call( self._papi.users_works, author_id=user_id, ): yield PixivIllust.from_papi(self, json_result) @query_set @_need_auth def ranking( self, mode=RankingMode.DAY, date=None, ): """Fetch all ranking illusts. Fetch all ranking illusts and returns them from rank high to low. If you didn't turn off the browsing restriction in account setting, the R-18(G) illusts will be excluded. Args: mode: Could be: [default] DAY WEEK MONTH DAY_MALE DAY_FEMALE WEEK_ORIGINAL WEEK_ROOKIE DAY_MANGA DAY_R18 DAY_MALE_R18 DAY_FEMALE_R18 WEEK_R18 WEEK_R18G These constants are defined in pixiv_pixie.constants.RankingMode. date: Could be: [default] None: Will fetch the latest ranking. A date or datetime object. A string in the format of '%Y-%m-%d', e.g., '2017-08-01'. Returns: A QuerySet that yield PixivIllust object. Raises: Any exceptions check_auth() will raise. """ if isinstance(date, (datetime.date, datetime.datetime)): date = date.strftime('%Y-%m-%d') # The response of PAPI does not contains metadata. So AAPI was used. for rank, json_result in enumerate(self._aapi_call( self._aapi.illust_ranking, mode=mode.value, date=date, ), start=1): illust = PixivIllust.from_aapi(self, json_result) illust.rank = rank yield illust @query_set @_need_auth def search( self, query, mode=SearchMode.TAG, period=SearchPeriod.ALL, order=SearchOrder.DESC, ): """Search illusts. Search illusts. Args: query: Query keyword. You can separate multiple keywords by space. mode: Could be: TEXT: Search in title and caption. [default] TAG: Search in tags. EXACT_TAG: Search in tags. Only exactly matched tag is acceptable. CAPTION: Search in caption. These constants are defined in pixiv_pixie.constants.SearchMode. period: Could be: [default] ALL DAY WEEK MONTH This parameter is only applied when order is ASC. These constants are defined in pixiv_pixie.constants.SearchPeriod. order: Could be: [default] DESC: The output will be from new to old. ASC: The output will be from old to new. These constants are defined in pixiv_pixie.constants.SearchOrder. Returns: A QuerySet that yield PixivIllust object. Raises: Any exceptions check_auth() will raise. """ for json_result in self._papi_call( self._papi.search_works, query=query, mode=mode.value, period=period.value, order=order.value, ): yield PixivIllust.from_papi(self, json_result) @query_set @_need_auth def related_illusts(self, illust_id, limit=None): """Fetch all related illusts. Fetch all related illusts of a provided illust. Args: illust_id: An integer. limit: Max number of illust to be yield. If limit=None, there will be no limit. Returns: A QuerySet that yield PixivIllust object. Raises: Any exceptions check_auth() will raise. """ for cnt, json_result in enumerate(self._aapi_call( self._aapi.illust_related, illust_id=illust_id, ), start=1): if limit is not None and cnt > limit: break yield PixivIllust.from_aapi(self, json_result) @classmethod def convert_zip_to_gif( cls, input_file, frame_delays=None, output_file=None, use_pil=False, ): """Convert a zip file that contains all frames into gif. Convert a zip file that contains all frames into gif. Args: input_file: The input file. May be str or a file-like object. frame_delays: A list of delay durations in microsecond. output_file: The output file. May be str or a file-like object. use_pil: Whether to ues Pillow library to create GIF file. By default FreeImage library will be used. FreeImage create better quality and smaller size file, but require external .dll/.so and may crash for unknown reason. """ if frame_delays is None: if isinstance(input_file, str): frame_info = os.path.splitext(input_file)[0] + '.txt' with open(frame_info, 'rt', encoding='utf-8') as f: frame_delays = [int(line) for line in f if line.strip()] else: raise ValueError('Could not get frame delays.') if output_file is None: if isinstance(input_file, str): output_file = os.path.splitext(input_file)[0] + '.gif' else: raise ValueError('Could not determined output filename.') dir_name = os.path.dirname(output_file) if dir_name: os.makedirs(dir_name, exist_ok=True) images = [] with ZipFile(input_file) as zip_file: for name in sorted(zip_file.namelist()): with zip_file.open(name) as input_file: images.append(imageio.imread(io.BytesIO( input_file.read()))) frame_delays = [delay / 1000 for delay in frame_delays] if not use_pil: save_format = 'GIF-FI' else: save_format = 'GIF-PIL' imageio.mimwrite( output_file, images, format=save_format, duration=frame_delays, ) del images @classmethod def _get_file_path( cls, illust, page, url, convert_ugoira, directory, name, addition_naming_info, ): original_name = os.path.basename(url) root, ext = os.path.splitext(original_name) if convert_ugoira and ext == '.zip': ext = '.gif' original_name = root + ext if name: naming_info = { 'illust': illust, 'page': page, 'original_name': original_name, 'root': root, 'ext': ext, } if addition_naming_info: naming_info.update(addition_naming_info) filename = name.format(**naming_info) else: filename = original_name file_path = os.path.join(directory, filename) return file_path @classmethod def _try_remove_file(cls, path): if not isinstance(path, str) or not path: return try: os.remove(path) except OSError: pass @classmethod def _check_exist(cls, path, checklist): basename = os.path.basename(path) for folder in checklist: if os.path.exists(os.path.join(folder, basename)): return True return False def _download_illust_to_file(self, url, file): requests_kwargs = self.requests_kwargs.copy() requests_kwargs['stream'] = True requests_kwargs['headers'] = ILLUST_DOWNLOAD_HEADERS try: wrote_size = 0 total_size = None for wrote_size, total_size in download( file, url, **requests_kwargs, ): pass if total_size is not None and wrote_size < total_size: raise APIError( self.download, 'Unexpected connection interruption.', ) except requests.HTTPError as e: raise APIError(self.download, e.response.text) from e def _download_one_url( self, illust, url, path, convert_ugoira, replace, check_exists, max_tries, fake_download, use_pil, ): if not replace and os.path.exists(path): return False if self._check_exist(path, check_exists): return False if fake_download: return False dir_name = os.path.dirname(path) frame_path = None for tries in count(start=1): try: buffer = io.BytesIO() self._download_illust_to_file(url, buffer) buffer.seek(0) if illust.type == IllustType.UGOIRA and convert_ugoira: self.convert_zip_to_gif( buffer, illust.frame_delays, path, use_pil, ) else: if dir_name: os.makedirs(dir_name, exist_ok=True) with open(path, 'wb') as f: copyfileobj(buffer, f) if illust.type == IllustType.UGOIRA: frame_path = os.path.splitext(path)[0] + '.txt' with open(frame_path, 'wt') as f: for frame_delay in illust.frame_delays: print(frame_delay, file=f) return True except Exception as e: self._try_remove_file(path) self._try_remove_file(frame_path) if max_tries is None or tries < max_tries: continue raise DownloadError(illust, e) from e def _download_multiple_urls( self, illust, target, convert_ugoira, replace, check_exists, max_tries, fake_download, use_pil, ): result = [] for url, path in target: result.append((url, path, self._download_one_url( illust, url, path, convert_ugoira=convert_ugoira, replace=replace, check_exists=check_exists, max_tries=max_tries, fake_download=fake_download, use_pil=use_pil, ))) return result @_need_auth def download( self, illust, directory=os.path.curdir, name=None, addition_naming_info=None, convert_ugoira=True, replace=False, check_exists=None, max_tries=5, fake_download=False, use_pil=False, ): """Download illust. Download illust. Args: illust: The illust or illust_id to be downloaded. directory: Directory. name: If set, the downloaded file would be renamed. Could contains format string syntax. e.g. name='{illust.user_id}_{original_name}' The following information is provided: illust: The illust object. page: 0-based page number. original_name: The default filename. root: The root part of original_name. e.g. 'foo' in 'foo.bar'. ext: The extension part of original_name. e.g. '.bar' in 'foo.bar'. addition_naming_info: Addition dict that will be used when formatting name. convert_ugoira: Whether to download ugoira as gif. If false, a zip file will be downloaded instead. And a txt file contains frame durations would be created. replace: If true, will replace already exist file(s). check_exists: Addition path(s) to check whether the illust exists (by name). Could be a path string, a list of path string or None. max_tries: Max try times when download failed. If max_tries=None, it will loop infinitely until finished. fake_download: If True, no file will be actually downloaded. use_pil: Whether to ues Pillow library to create GIF file. Refers to the doc of PixivPixie.convert_zip_to_gif(). Returns: A list of download result of each page. Each result is a tuple of (url, path, downloaded). Raises: Any exceptions check_auth() will raise. DownloadError. """ if isinstance(illust, int): illust = self.illust(illust) if check_exists is None: check_exists = [] elif isinstance(check_exists, str): check_exists = [check_exists] download_target = [] for tries in count(start=1): try: download_target = [( url, self._get_file_path( illust, page, url, convert_ugoira, directory, name, addition_naming_info, ), ) for page, url in enumerate(illust.image_urls)] break except Exception as e: if max_tries is None or tries < max_tries: continue raise DownloadError(illust, e) from e return self._download_multiple_urls( illust, download_target, convert_ugoira=convert_ugoira, replace=replace, check_exists=check_exists, max_tries=max_tries, fake_download=fake_download, use_pil=use_pil, )
class Pixiv(object): def __init__(self, dbDict, config): self.dbDict = dbDict self.username = config['PIXIV_USERNAME'] self.password = config['PIXIV_PASSWORD'] self.downloadDirectory = config['PIXIV_DOWNLOAD_DIRECTORY'] self.avatarDirectory = config['PIXIV_AVATAR_DIRECTORY'] os.makedirs(self.downloadDirectory, exist_ok=True) os.makedirs(self.avatarDirectory, exist_ok=True) self.api = PixivAPI() self.authorize() def authorize(self): self.api.login(self.username, self.password) def loadWorks(self): print('Retrieving Pixiv works') self.authorize() feeds = self.api.me_feeds() workIds = [r['ref_work']['id'] for r in feeds['response'] if r['type'] == 'add_illust'] workDicts = [self._getWorkDict(workId) for workId in workIds] works = [w for workDict in workDicts for w in self._getImageData(workDict)] return works def _getWorkDict(self, workId): url = 'https://public-api.secure.pixiv.net/v1/works/' + workId + '.json' result = self.api.auth_requests_call('GET', url) try: return self.api.parse_result(result) except PixivError as p: return {'status' : 'failure', 'errors' : str(p)} def _getImageData(self, workDict): imageList = [] for imageDict in workDict['response']: imageData = { 'identifier' : '', 'authorName' : '', 'authorHandle' : '', 'authorAvatarUrl' : '', 'profileUrl' : '', 'website' : '', 'imageTitle' : '', 'imageUrls' : [], 'imagePageUrl' : '', 'imageTimestamp' : '', 'imageType' : '', 'nsfw' : False, 'width' : '500', 'height' : '500', 'success' : False, 'error' : 'Unknown error', } if workDict['status'] == 'success': identifier = str(imageDict.get('id')) if identifier not in self.dbDict['works']: # Skip images we've already loaded user = imageDict.get('user') or {} imageData['identifier'] = identifier imageData['authorName'] = str(user.get('name')) imageData['authorHandle'] = str(user.get('account')) imageData['authorAvatarUrl'] = self._getAvatarUrl(str((user.get('profile_image_urls') or {}).get('px_50x50'))) imageData['profileUrl'] = 'http://www.pixiv.net/member.php?id=' + str(user.get('id')) imageData['website'] = 'Pixiv' imageData['imageTitle'] = str(imageDict.get('title')) imageData['imageUrls'] = self._getImageUrls(imageDict) imageData['imagePageUrl'] = 'http://www.pixiv.net/member_illust.php?mode=medium&illust_id=' + str(imageDict.get('id')) imageData['imageTimestamp'] = str(max(imageDict.get('created_time'), imageDict.get('reupoloaded_time', ''))) imageData['imageType'] = str(imageDict.get('type')) imageData['nsfw'] = str(imageDict.get('age_limit') != 'all-age') imageData['width'] = str(imageDict.get('width')) or '500' imageData['height'] = str(imageDict.get('height')) or '500' imageData['success'] = str(imageDict.get('status') == 'success') imageData['error'] = str(imageDict.get('errors')) self.dbDict['works'][identifier] = imageData else: raise RuntimeError('Failed Pixiv API call: ' + workDict.get('errors')) return imageList def _parseTime(self, imageDict): s = max(imageDict.get('created_time', ''), imageDict.get('reupoloaded_time', '')) return datetime.datetime.strptime(s, '%Y-%m-%d %H:%M:%S').replace(tzinfo=pytz.UTC).isoformat() def _getAvatarUrl(self, remoteUrl): return self._downloadImage(remoteUrl, self.avatarDirectory) def _getImageUrls(self, imageDict): workType = imageDict.get('type') if workType == 'illustration': urlDict = imageDict.get('image_urls') or {} urls = [self._generateImageUrl(urlDict.get('small') or urlDict.get('medium') or urlDict.get('large'))] elif workType == 'manga': pages = (imageDict.get('metadata') or {}).get('pages') or [] def getMangaUrl(d): urld = d.get('image_urls') return self._generateImageUrl(urld.get('small') or urld.get('medium') or urld.get('large')) urls = [getMangaUrl(item) for item in pages] # Ugoira handling falls through to default for now # elif workType == 'ugoira': # pass else: #Default case; all response types seem to have at least something in image_urls urlDict = imageDict.get('image_urls') or {} urls = [urlDict.get('small') or urlDict.get('medium') or urlDict.get('large')] urls = [self._downloadImage(url, self.downloadDirectory) for url in urls] return urls def _generateImageUrl(self, url): # Construct the URL for the full-res image. Super brittle; entirely dependent on Pixiv never changing anything leftSide = url[:url.find('pixiv.net')+10] rightSide = url[url.find('/img/'):].replace('_master1200', '') return leftSide + 'img-original' + rightSide def _downloadImage(self, url, directory): print('Downloading ' + url) def attemptDownload(attemptUrl, suffix): attemptUrl = '.'.join((attemptUrl.rpartition('.')[0], suffix)) return requests.get(attemptUrl, headers={'referer': attemptUrl[:attemptUrl.find('/img')]}, stream=True) r = attemptDownload(url, 'png') if r.status_code == 404: r = attemptDownload(url, 'jpg') if r.status_code == 404: r = attemptDownload(url, 'gif') if r.status_code == 200: filename = url.split('/')[-1] filepath = os.path.join(directory, filename) if os.path.isfile(filepath): print ('File already downloaded; skipping') else: with open(filepath, 'wb') as f: for chunk in r: f.write(chunk) return '/'.join((directory, filename)) else: return r.status_code + ' ' + url