예제 #1
0
class PixivHandler:
    def __init__(self, name, app_config={}):
        config_path = Path(app_config.get('handlers_config_dir', '.')) / 'pixiv.toml'
        data_path = Path(app_config.get('data_dir', './data/')) / '{}.toml'.format(name)
        self.config = Config(config_path, write_defaults=True, defaults={
            'refresh': 'xxxx',
        })
        self.config.save()
        self.data = Config(data_path)
        self.age_filter = None
        self.api = PixivAPI()
        if self.config.get('refresh'):
            print('logging in to Pixiv...')
            login_response = self.api.auth(refresh_token=self.config['refresh'])
            print('logged in into account {0.name} ({0.account}) [{0.id}]'.format(login_response['response']['user']))

    def set_age_filter(self, filter):
        self.age_filter = filter

    def handle(self, feed):
        if feed == 'followings':
            data = self.api.me_following_works(image_sizes=['large', 'medium'], include_stats=False)
        elif feed == 'bookmarks':
            data = self.api.me_favorite_works()
        else:
            return []
        if data['status'] != 'success':
            print('invalid response')
            print('got:')
            print(data)
            return []
        results = data['response']
        save_data = self.data.get(feed, {'last_id': 0})
        print('latest id: {}'.format(save_data.get('last_id')))
        results = list(filter(lambda x: x['id'] > save_data.get('last_id'), results))
        if len(results) == 0:
            return []
        save_data['last_id'] = results[0]['id']
        self.data[feed] = save_data
        self.data.save()
        ret = []
        for entry in results:
            print('Handling pixiv entry {}'.format(entry['id']))
            if self.age_filter != None:
                if entry['age_limit'] in ['r18', 'r18-g'] and self.age_filter == 'safe':
                    print('skipping because currently in safe mode')
                    continue
                if entry['age_limit'] == 'all-age' and self.age_filter == 'r18':
                    print('skipping because currently in r18 mode')
                    continue
            content = '<https://www.pixiv.net/artworks/{}>'.format(entry['id'])
            content += '\n{} by {} ({})'.format(entry['title'], entry['user']['name'], entry['user']['account'])
            content += '\nTags: {}'.format(' '.join(entry['tags']))
            if entry['is_manga']:
                print('it\'s a manga')
                work = self.api.works(entry['id'])
                if work['status'] != 'success':
                    continue
                work = work['response']
                if len(work) == 0:
                    continue
                work = work[0]
                urls = [x['image_urls']['medium'] for x in work['metadata']['pages']]
                if len(urls) > 4:
                    content += '\n{} more pictures not shown here'.format(len(urls) - 4)
                    urls = urls[:4]
            else:
                if entry['width'] > 2000 or entry['height'] > 2000:
                    content += '\n(not displaying full resolution because it is too large)'
                    urls = [entry['image_urls']['medium']]
                else:
                    urls = [entry['image_urls']['large']]
            files = []
            index = 0
            for url in urls:
                print('downloading picture...')
                response = requests.get(url, headers={'referer': 'https://pixiv.net'})
                if response.status_code != 200:
                    continue
                ext = Path(url).suffix
                files.append({'data': response.content, 'name': 'page{}{}'.format(index, ext)})
                index += 1
            ret.append({'content': content, 'files': files})
        ret.reverse()
        return ret
예제 #2
0
파일: pixiv.py 프로젝트: AmazingThew/ArtBot
class Pixiv(object):
    def __init__(self, dbDict, config):
        self.config = config
        self.dbDict = dbDict
        self.username = config['PIXIV_USERNAME']
        self.password = config['PIXIV_PASSWORD']
        self.imageDirectory = os.path.join(config['PIXIV_DOWNLOAD_DIRECTORY'],
                                           'images')
        self.ugoiraDirectory = os.path.join(config['PIXIV_DOWNLOAD_DIRECTORY'],
                                            'ugoira')
        self.avatarDirectory = os.path.join(config['PIXIV_DOWNLOAD_DIRECTORY'],
                                            'avatars')
        os.makedirs(self.imageDirectory, exist_ok=True)
        os.makedirs(self.ugoiraDirectory, exist_ok=True)
        os.makedirs(self.avatarDirectory, exist_ok=True)
        self.api = PixivAPI()
        self.authorize()

    def authorize(self):
        self.api.login(self.username, self.password)

    def loadWorks(self):
        print('Retrieving Pixiv works')
        self.authorize()
        apiWorks = self.api.me_following_works(
            1, self.config['MAX_WORKS_ON_PAGE'])
        workDicts = apiWorks['response']
        workDicts = [w for w in workDicts]
        [self._getImageData(workDict) for workDict in workDicts]

    def loadExtraWorkInfo(self):
        updates = []
        worksToUpdate = [
            work for work in self.dbDict['works'].values()
            if work['website'] == 'Pixiv' and not work.get('imageUrls')
        ]
        if worksToUpdate:
            print("Found {} new Pixiv works".format(len(worksToUpdate)))
        for work in worksToUpdate:
            imageDict = work['pixivMeta']
            extraInfo = {
                'authorAvatarUrl':
                self._getAvatarUrl(
                    str(
                        imageDict.get('user').get('profile_image_urls').get(
                            'px_50x50'))),
                'imageUrls':
                self._getImageUrls(imageDict),
                'pixivMeta':
                '',
            }
            updates.append((work['identifier'], extraInfo))

        [
            self.dbDict['works'][identifier].update(extraInfo)
            for (identifier, extraInfo) in updates
        ]

    def _getImageData(self, imageDict):
        identifier = str(imageDict.get('id'))
        if identifier not in self.dbDict[
                'works']:  # Skip images we've already loaded
            user = imageDict.get('user') or {}
            imageData = {
                'identifier':
                identifier,
                'authorName':
                str(user.get('name')),
                'authorHandle':
                str(user.get('account')),
                'authorAvatarUrl':
                None,
                'profileUrl':
                'http://www.pixiv.net/member.php?id=' + str(user.get('id')),
                'website':
                'Pixiv',
                'imageTitle':
                str(imageDict.get('title')),
                'imageUrls':
                None,
                'imagePageUrl':
                'http://www.pixiv.net/member_illust.php?mode=medium&illust_id='
                + str(imageDict.get('id')),
                'imageTimestamp':
                self._parseTime(imageDict),
                'imageType':
                str(imageDict.get('type')),
                'nsfw':
                str(imageDict.get('age_limit') != 'all-age'),
                'width':
                str(imageDict.get('width')) or '500',
                'height':
                str(imageDict.get('height')) or '500',
                'success':
                str(imageDict.get('status') == 'success'),
                'error':
                str(imageDict.get('errors')),
                'pixivMeta':
                imageDict,  #stores the pixiv API info to facilitate late download of images
            }

            self.dbDict['works'][identifier] = imageData

    def _parseTime(self, imageDict):
        s = max(imageDict.get('created_time', ''),
                imageDict.get('reupoloaded_time', ''))
        return datetime.datetime.strptime(s, '%Y-%m-%d %H:%M:%S').replace(
            tzinfo=pytz.timezone("Asia/Tokyo")).astimezone(
                pytz.utc).isoformat()

    def _getAvatarUrl(self, remoteUrl):
        return self._downloadImage(remoteUrl, self.avatarDirectory)

    def _getImageUrls(self, imageDict):
        workType = imageDict.get('type')

        if imageDict.get('is_manga'):
            response = self.api.works(imageDict['id'])
            response = response.get('response')[0] or {}
            metadata = response.get('metadata') or {}
            pages = metadata.get('pages') or []

            def getMangaUrl(d):
                urld = d.get('image_urls')
                return self._generateImageUrl(
                    urld.get('small') or urld.get('medium')
                    or urld.get('large'))

            urls = [getMangaUrl(item) for item in pages]

        # Weird special case: "type" field in Pixiv JSON can be "manga" while "is_manga" is False
        # In this case there is only a single image URL and the JSON is formatted like an illustration
        elif workType == 'illustration' or (workType == 'manga'
                                            and not imageDict.get('is_manga')):
            urlDict = imageDict.get('image_urls') or {}
            urls = [
                self._generateImageUrl(
                    urlDict.get('small') or urlDict.get('medium')
                    or urlDict.get('large'))
            ]

        elif workType == 'ugoira':
            return self._constructUgoira(imageDict.get('id'))

        else:
            #Default case; all response types seem to have at least something in image_urls
            urlDict = imageDict.get('image_urls') or {}
            urls = [
                urlDict.get('small') or urlDict.get('medium')
                or urlDict.get('large')
            ]

        urls = [self._downloadImage(url, self.imageDirectory) for url in urls]
        return urls

    def _generateImageUrl(self, url):
        # Construct the URL for the full-res image. Super brittle; entirely dependent on Pixiv never changing anything
        leftSide = url[:url[8:].find('/') +
                       9]  #Split on first slash after https://
        rightSide = url[url.find('/img/'):].replace('_master1200', '')
        return leftSide + 'img-original' + rightSide

    def _downloadImage(self, url, directory):
        name = url[url.rfind('/') + 1:url.rfind('.')]
        extant = {
            name.split('.')[0]: os.path.join(directory, name)
            for name in os.listdir(directory)
        }
        if extant.get(name):
            print('Already downloaded {}'.format(url))
            return extant.get(name)

        print('Downloading ' + url)

        def attemptDownload(attemptUrl, suffix):
            attemptUrl = '.'.join((attemptUrl.rpartition('.')[0], suffix))
            return requests.get(
                attemptUrl,
                headers={'referer': attemptUrl[:attemptUrl.find('/img')]},
                stream=True)

        r = attemptDownload(url, 'png')
        if r.status_code == 404:
            r = attemptDownload(url, 'jpg')
            if r.status_code == 404:
                r = attemptDownload(url, 'gif')

        if r.status_code == 200:
            filename = url.split('/')[-1]
            filepath = os.path.join(directory, filename)
            with open(filepath, 'wb') as f:
                for chunk in r:
                    f.write(chunk)
            return '/'.join((directory, filename))
        else:
            return r.status_code + ' ' + url

    def _constructUgoira(self, identifier):
        directory = os.path.join(self.ugoiraDirectory, str(identifier))
        os.makedirs(directory, exist_ok=True)

        response = self.api.works(identifier)
        response = response.get('response')[0] or {}
        metadata = response.get('metadata') or {}
        frameTimes = [
            'duration {}'.format(delay['delay_msec'] / 1000)
            for delay in metadata.get('frames')
        ]
        zipUrl = sorted(metadata['zip_urls'].items())[-1][
            1]  # I don't think zip_urls will ever be longer than 1 but ??

        zipPath = self._downloadUgoiraZip(zipUrl, directory)
        with zipfile.ZipFile(zipPath, 'r') as zap:
            zap.extractall(directory)

        imagePaths = [
            "file '{}'".format(fileName) for fileName in os.listdir(directory)
            if not fileName.endswith('.zip')
        ]
        frameData = '\n'.join(itertools.chain(*zip(imagePaths, frameTimes)))

        concatFile = os.path.join(directory, 'concat.txt')
        print('Writing frame data to: {}'.format(concatFile))
        with open(concatFile, 'w') as f:
            f.write(frameData)

        concatFile = os.path.abspath(os.path.join(os.getcwd(), concatFile))
        workingDirectory = os.path.abspath(os.path.join(
            os.getcwd(), directory))
        outFile = os.path.join(directory, '{}.webm'.format(identifier))
        ffmpeg = 'ffmpeg -n -f concat -i {} -c:v libvpx -crf 10 -b:v 2M {}.webm'.format(
            concatFile, identifier)
        print('Rendering video to {}'.format(outFile))
        subprocess.run(ffmpeg, shell=True, cwd=workingDirectory)
        print('Finished rendering')

        return [outFile]

    def _downloadUgoiraZip(self, url, directory):
        print('Downloading ugoira zip: {}'.format(url))
        path = os.path.join(directory, url.split('/')[-1])
        if os.path.exists(path):
            print('Zip already downloaded; skipping')
        else:
            r = requests.get(url,
                             headers={'referer': url[:url.find('/img')]},
                             stream=True)
            with open(path, 'wb') as f:
                for chunk in r.iter_content(chunk_size=1024):
                    if chunk:  # filter out keep-alive new chunks
                        f.write(chunk)

        return path