class PixivHandler: def __init__(self, name, app_config={}): config_path = Path(app_config.get('handlers_config_dir', '.')) / 'pixiv.toml' data_path = Path(app_config.get('data_dir', './data/')) / '{}.toml'.format(name) self.config = Config(config_path, write_defaults=True, defaults={ 'refresh': 'xxxx', }) self.config.save() self.data = Config(data_path) self.age_filter = None self.api = PixivAPI() if self.config.get('refresh'): print('logging in to Pixiv...') login_response = self.api.auth(refresh_token=self.config['refresh']) print('logged in into account {0.name} ({0.account}) [{0.id}]'.format(login_response['response']['user'])) def set_age_filter(self, filter): self.age_filter = filter def handle(self, feed): if feed == 'followings': data = self.api.me_following_works(image_sizes=['large', 'medium'], include_stats=False) elif feed == 'bookmarks': data = self.api.me_favorite_works() else: return [] if data['status'] != 'success': print('invalid response') print('got:') print(data) return [] results = data['response'] save_data = self.data.get(feed, {'last_id': 0}) print('latest id: {}'.format(save_data.get('last_id'))) results = list(filter(lambda x: x['id'] > save_data.get('last_id'), results)) if len(results) == 0: return [] save_data['last_id'] = results[0]['id'] self.data[feed] = save_data self.data.save() ret = [] for entry in results: print('Handling pixiv entry {}'.format(entry['id'])) if self.age_filter != None: if entry['age_limit'] in ['r18', 'r18-g'] and self.age_filter == 'safe': print('skipping because currently in safe mode') continue if entry['age_limit'] == 'all-age' and self.age_filter == 'r18': print('skipping because currently in r18 mode') continue content = '<https://www.pixiv.net/artworks/{}>'.format(entry['id']) content += '\n{} by {} ({})'.format(entry['title'], entry['user']['name'], entry['user']['account']) content += '\nTags: {}'.format(' '.join(entry['tags'])) if entry['is_manga']: print('it\'s a manga') work = self.api.works(entry['id']) if work['status'] != 'success': continue work = work['response'] if len(work) == 0: continue work = work[0] urls = [x['image_urls']['medium'] for x in work['metadata']['pages']] if len(urls) > 4: content += '\n{} more pictures not shown here'.format(len(urls) - 4) urls = urls[:4] else: if entry['width'] > 2000 or entry['height'] > 2000: content += '\n(not displaying full resolution because it is too large)' urls = [entry['image_urls']['medium']] else: urls = [entry['image_urls']['large']] files = [] index = 0 for url in urls: print('downloading picture...') response = requests.get(url, headers={'referer': 'https://pixiv.net'}) if response.status_code != 200: continue ext = Path(url).suffix files.append({'data': response.content, 'name': 'page{}{}'.format(index, ext)}) index += 1 ret.append({'content': content, 'files': files}) ret.reverse() return ret
class Pixiv(object): def __init__(self, dbDict, config): self.config = config self.dbDict = dbDict self.username = config['PIXIV_USERNAME'] self.password = config['PIXIV_PASSWORD'] self.imageDirectory = os.path.join(config['PIXIV_DOWNLOAD_DIRECTORY'], 'images') self.ugoiraDirectory = os.path.join(config['PIXIV_DOWNLOAD_DIRECTORY'], 'ugoira') self.avatarDirectory = os.path.join(config['PIXIV_DOWNLOAD_DIRECTORY'], 'avatars') os.makedirs(self.imageDirectory, exist_ok=True) os.makedirs(self.ugoiraDirectory, exist_ok=True) os.makedirs(self.avatarDirectory, exist_ok=True) self.api = PixivAPI() self.authorize() def authorize(self): self.api.login(self.username, self.password) def loadWorks(self): print('Retrieving Pixiv works') self.authorize() apiWorks = self.api.me_following_works( 1, self.config['MAX_WORKS_ON_PAGE']) workDicts = apiWorks['response'] workDicts = [w for w in workDicts] [self._getImageData(workDict) for workDict in workDicts] def loadExtraWorkInfo(self): updates = [] worksToUpdate = [ work for work in self.dbDict['works'].values() if work['website'] == 'Pixiv' and not work.get('imageUrls') ] if worksToUpdate: print("Found {} new Pixiv works".format(len(worksToUpdate))) for work in worksToUpdate: imageDict = work['pixivMeta'] extraInfo = { 'authorAvatarUrl': self._getAvatarUrl( str( imageDict.get('user').get('profile_image_urls').get( 'px_50x50'))), 'imageUrls': self._getImageUrls(imageDict), 'pixivMeta': '', } updates.append((work['identifier'], extraInfo)) [ self.dbDict['works'][identifier].update(extraInfo) for (identifier, extraInfo) in updates ] def _getImageData(self, imageDict): identifier = str(imageDict.get('id')) if identifier not in self.dbDict[ 'works']: # Skip images we've already loaded user = imageDict.get('user') or {} imageData = { 'identifier': identifier, 'authorName': str(user.get('name')), 'authorHandle': str(user.get('account')), 'authorAvatarUrl': None, 'profileUrl': 'http://www.pixiv.net/member.php?id=' + str(user.get('id')), 'website': 'Pixiv', 'imageTitle': str(imageDict.get('title')), 'imageUrls': None, 'imagePageUrl': 'http://www.pixiv.net/member_illust.php?mode=medium&illust_id=' + str(imageDict.get('id')), 'imageTimestamp': self._parseTime(imageDict), 'imageType': str(imageDict.get('type')), 'nsfw': str(imageDict.get('age_limit') != 'all-age'), 'width': str(imageDict.get('width')) or '500', 'height': str(imageDict.get('height')) or '500', 'success': str(imageDict.get('status') == 'success'), 'error': str(imageDict.get('errors')), 'pixivMeta': imageDict, #stores the pixiv API info to facilitate late download of images } self.dbDict['works'][identifier] = imageData def _parseTime(self, imageDict): s = max(imageDict.get('created_time', ''), imageDict.get('reupoloaded_time', '')) return datetime.datetime.strptime(s, '%Y-%m-%d %H:%M:%S').replace( tzinfo=pytz.timezone("Asia/Tokyo")).astimezone( pytz.utc).isoformat() def _getAvatarUrl(self, remoteUrl): return self._downloadImage(remoteUrl, self.avatarDirectory) def _getImageUrls(self, imageDict): workType = imageDict.get('type') if imageDict.get('is_manga'): response = self.api.works(imageDict['id']) response = response.get('response')[0] or {} metadata = response.get('metadata') or {} pages = metadata.get('pages') or [] def getMangaUrl(d): urld = d.get('image_urls') return self._generateImageUrl( urld.get('small') or urld.get('medium') or urld.get('large')) urls = [getMangaUrl(item) for item in pages] # Weird special case: "type" field in Pixiv JSON can be "manga" while "is_manga" is False # In this case there is only a single image URL and the JSON is formatted like an illustration elif workType == 'illustration' or (workType == 'manga' and not imageDict.get('is_manga')): urlDict = imageDict.get('image_urls') or {} urls = [ self._generateImageUrl( urlDict.get('small') or urlDict.get('medium') or urlDict.get('large')) ] elif workType == 'ugoira': return self._constructUgoira(imageDict.get('id')) else: #Default case; all response types seem to have at least something in image_urls urlDict = imageDict.get('image_urls') or {} urls = [ urlDict.get('small') or urlDict.get('medium') or urlDict.get('large') ] urls = [self._downloadImage(url, self.imageDirectory) for url in urls] return urls def _generateImageUrl(self, url): # Construct the URL for the full-res image. Super brittle; entirely dependent on Pixiv never changing anything leftSide = url[:url[8:].find('/') + 9] #Split on first slash after https:// rightSide = url[url.find('/img/'):].replace('_master1200', '') return leftSide + 'img-original' + rightSide def _downloadImage(self, url, directory): name = url[url.rfind('/') + 1:url.rfind('.')] extant = { name.split('.')[0]: os.path.join(directory, name) for name in os.listdir(directory) } if extant.get(name): print('Already downloaded {}'.format(url)) return extant.get(name) print('Downloading ' + url) def attemptDownload(attemptUrl, suffix): attemptUrl = '.'.join((attemptUrl.rpartition('.')[0], suffix)) return requests.get( attemptUrl, headers={'referer': attemptUrl[:attemptUrl.find('/img')]}, stream=True) r = attemptDownload(url, 'png') if r.status_code == 404: r = attemptDownload(url, 'jpg') if r.status_code == 404: r = attemptDownload(url, 'gif') if r.status_code == 200: filename = url.split('/')[-1] filepath = os.path.join(directory, filename) with open(filepath, 'wb') as f: for chunk in r: f.write(chunk) return '/'.join((directory, filename)) else: return r.status_code + ' ' + url def _constructUgoira(self, identifier): directory = os.path.join(self.ugoiraDirectory, str(identifier)) os.makedirs(directory, exist_ok=True) response = self.api.works(identifier) response = response.get('response')[0] or {} metadata = response.get('metadata') or {} frameTimes = [ 'duration {}'.format(delay['delay_msec'] / 1000) for delay in metadata.get('frames') ] zipUrl = sorted(metadata['zip_urls'].items())[-1][ 1] # I don't think zip_urls will ever be longer than 1 but ?? zipPath = self._downloadUgoiraZip(zipUrl, directory) with zipfile.ZipFile(zipPath, 'r') as zap: zap.extractall(directory) imagePaths = [ "file '{}'".format(fileName) for fileName in os.listdir(directory) if not fileName.endswith('.zip') ] frameData = '\n'.join(itertools.chain(*zip(imagePaths, frameTimes))) concatFile = os.path.join(directory, 'concat.txt') print('Writing frame data to: {}'.format(concatFile)) with open(concatFile, 'w') as f: f.write(frameData) concatFile = os.path.abspath(os.path.join(os.getcwd(), concatFile)) workingDirectory = os.path.abspath(os.path.join( os.getcwd(), directory)) outFile = os.path.join(directory, '{}.webm'.format(identifier)) ffmpeg = 'ffmpeg -n -f concat -i {} -c:v libvpx -crf 10 -b:v 2M {}.webm'.format( concatFile, identifier) print('Rendering video to {}'.format(outFile)) subprocess.run(ffmpeg, shell=True, cwd=workingDirectory) print('Finished rendering') return [outFile] def _downloadUgoiraZip(self, url, directory): print('Downloading ugoira zip: {}'.format(url)) path = os.path.join(directory, url.split('/')[-1]) if os.path.exists(path): print('Zip already downloaded; skipping') else: r = requests.get(url, headers={'referer': url[:url.find('/img')]}, stream=True) with open(path, 'wb') as f: for chunk in r.iter_content(chunk_size=1024): if chunk: # filter out keep-alive new chunks f.write(chunk) return path