Beispiel #1
0
def pixiv(page_url, options):
    from pixivpy3 import AppPixivAPI

    parsed = urlparse(page_url)

    id = int(parsed.path.split("/")[-1])

    api = AppPixivAPI()

    secrets = h.get_secrets()["pixiv"]
    api.login(secrets["username"], secrets["password"])

    data = api.illust_detail(id)["illust"]

    if len(data["meta_pages"]) == 0:
        image_url = data["meta_single_page"]["original_image_url"]
    elif options["album"]:
        image_url = [
            image["image_urls"]["original"] for image in data["meta_pages"]
        ]
    else:
        image_url = data["meta_pages"][
            options["index"]]["image_urls"]["original"]

    return Work(
        data["title"],
        (
            data["user"]["account" if options["username"] else "name"],
            data["user"]["name" if options["username"] else "account"],
        ),
        None,
        data["x_restrict"] > 0,
        image_url,
        page_url,
    )
Beispiel #2
0
class Pixiv(DummySite):
    def __init__(self, url, username, password, proxy=None):
        proxies = get_proxy(proxy)
        requests_kwargs = {
            "timeout": (3, 10),
        }
        requests_kwargs.update(proxies)
        self.api = AppPixivAPI(**requests_kwargs)
        self._fetcher = PixivFetcher(**proxies)
        self.api.login(username, password)
        self._user_id = int(url.split("/")[-1])
        self._dir_name = None
        self._total_illustrations = 0
        self._fetch_user_detail()

    @property
    def fetcher(self):
        return self._fetcher

    @property
    def dir_name(self):
        assert self._dir_name is not None
        return self._dir_name

    def _fetch_user_detail(self):
        assert self._user_id is not None
        profile = self.api.user_detail(self._user_id)
        user = profile['user']
        self._dir_name = "-".join([
            user['name'],
            user['account'],
            str(user['id']),
        ])
        self._dir_name = normalize_filename(self._dir_name)
        self._total_illustrations = profile['profile']['total_illusts']
        return self.dir_name

    def _fetch_image_list(self, ):
        ret = self.api.user_illusts(self._user_id)
        while True:
            for illustration in ret.illusts:
                yield from parse_image_urls(illustration)
            if ret.next_url is None:
                break
            ret = self.api.user_illusts(**self.api.parse_qs(ret.next_url))

    def _fetch_single_image_url(self, illustration_id):
        json_result = self.api.illust_detail(illustration_id)
        illustration_info = json_result.illust
        return illustration_info.image_urls['large']

    @property
    def tasks(self):
        yield from self._fetch_image_list()
Beispiel #3
0
def pixiv_render(item, base_path, debug=False):
    global pixiv_api
    if pixiv_api is None:
        pixiv_api = AppPixivAPI()
        pixiv_api.login(pixiv_username, pixiv_password)

    illust_id = get_illust_id(item.get_remote())

    detail = pixiv_api.illust_detail(illust_id)
    path = (str(detail['illust']['user']['name']) + '_' +
            str(detail['illust']['user']['id']))
    cpath(base_path + path)

    urls = []
    if detail['illust']['page_count'] > 1:
        for page in detail['illust']['meta_pages']:
            page_url = None
            try:
                page_url = page['image_urls']['original']
            except (NameError, KeyError):
                try:
                    page_url = list(page['image_urls'].values())[-1]
                except (NameError, KeyError):
                    pass
            if page_url is not None:
                urls.append(page_url)
    if len(urls) <= 0:
        try:
            urls.append(
                detail['illust']['meta_single_page']['original_image_url'])
        except (NameError, KeyError):
            try:
                urls.append(detail['illust']['image_urls']['large'])
            except (NameError, KeyError):
                pass

    ret = []
    for url in urls:
        name = str(detail['illust']['title']) + '_' + str(
            illust_id) + os.path.basename(url)
        ret.append(path + '/' + name)
        pixiv_api.download(url,
                           name=name,
                           path=os.path.abspath(base_path + path))
        if debug: print('.', end='', flush=True)
    return ret, detail
Beispiel #4
0
import json
import os
import re

from pixivpy3 import AppPixivAPI

with open('credentials.json') as cf:
    credentials = json.load(cf)

with open('urls.txt') as uf:
    urls = [u.split('/')[-1] for u in uf if 'pixiv.net' in u]

ids = [re.findall(r'\d+', id)[0] for id in urls]

api = AppPixivAPI()
api.login(credentials['email'], credentials['password'])

for id in ids:
    json_result = api.illust_detail(id)
    api.download(json_result.illust['meta_single_page']['original_image_url'])
Beispiel #5
0
class PixivDownloader:
    def __init__(self,
                 client=None,
                 username=None,
                 password=None,
                 log_level=logging.WARNING):
        if not client and (bool(username) != bool(password)):
            raise AttributeError(
                'If no client is given both username and password must be given'
            )

        if client:
            self.api = client
        else:
            self.api = AppPixivAPI()

        if not client and username and password:
            self.api.login(username, password)

        self.logger = logging.getLogger('PixivDownloader')
        stdout = logging.StreamHandler()
        self.logger.addHandler(stdout)
        self.logger.setLevel(log_level)

    def login(self, username=None, password=None, refresh_token=None):
        if refresh_token:
            self.logger.info('Loging in with refresh_token')
        elif username:
            self.logger.info('Loging in with username %s', username)
        else:
            self.logger.info('Loging')

        return self.api.auth(username=username,
                             password=password,
                             refresh_token=refresh_token)

    def logout(self):
        self.logger.info('Logout')
        self.api = AppPixivAPI()

    def get_id_from_url(self, url):
        path = urlparse(url).path
        ids = re.findall('(\\d+)', path)
        if not ids:
            raise ValueError('Url does not contain post id')

        return ids[0]

    def download_by_id(self, post_id, output_dir):
        data = self.api.illust_detail(post_id)
        if data.get('error'):
            raise PixivDownloaderError(
                'Could not get post info or post doesn\'t exist.', data)

        return self.download(data.illust, output_dir)

    def download_by_url(self, url, output_dir):
        return self.download_by_id(self.get_id_from_url(url), output_dir)

    def download(self, post, output_dir):
        output_dir = Path(output_dir).expanduser().absolute()
        if not os.path.isdir(output_dir):
            os.makedirs(output_dir)
            self.logger.debug('Created dir "%s"', output_dir)

        if post.type == 'illust' and not post.meta_pages:
            downloader = self.download_illust
            type = 'Image'
        elif post.type == 'illust' and post.meta_pages:
            downloader = self.download_illust_collection
            type = 'Image Collection'
        elif post.type == 'ugoira':
            downloader = self.download_ugoira
            type = 'Video'
        elif post.type == 'manga':
            downloader = self.download_manga
            type = 'Manga'
        else:
            raise PixivDownloaderError(
                f'Post type "{post.type}" not supported')

        self.logger.info('Initialize "%s" downloader for post %s', type,
                         post.id)
        return downloader(post, output_dir)

    def download_illust(self, post, output_dir):
        image_url = post.meta_single_page.get('original_image_url',
                                              post.image_urls.large)
        if '_webp' in image_url:
            extension = 'webp'
        else:
            extension = os.path.splitext(image_url)[1].lstrip('.')
        filename = self.get_filename(post, extension)

        self.logger.info('Downloading "%s"', image_url)
        self.api.download(image_url,
                          path=output_dir,
                          name=filename,
                          replace=True)
        yield (Path(output_dir) / filename).absolute()

    def download_illust_collection(self, post, output_dir):
        output_dir = Path(output_dir)
        yield from self._downloade_meta_pages(post, output_dir)

    def download_manga(self, post, output_dir):
        output_dir = Path(output_dir) / f'{post.title}-{post.user.account}'
        if not output_dir.is_dir():
            output_dir.mkdir(parents=True, exist_ok=True)
            self.logger.debug('Created dir "%s"', output_dir)

        yield from self._downloade_meta_pages(post, output_dir)

    def _downloade_meta_pages(self, post, output_dir):
        for index, image in enumerate(post.meta_pages, 1):
            image_url = image.image_urls.get('original',
                                             image.image_urls.large)

            if '_webp' in image_url:
                extension = 'webp'
            else:
                extension = os.path.splitext(image_url)[1].lstrip('.')
            filename = self.get_filename(post,
                                         extension,
                                         suffix=f'-{index:0>2}')

            self.logger.info('Downloading "%s"', image_url)
            self.api.download(image_url,
                              path=str(output_dir),
                              name=filename,
                              replace=True)
            yield (output_dir / filename).absolute()

    def download_ugoira(self, post, output_dir):
        ugoira_data = self.api.ugoira_metadata(post.id).ugoira_metadata
        zip_url = ugoira_data.zip_urls.get('large',
                                           ugoira_data.zip_urls.medium)

        with TemporaryDirectory() as dir:
            temp_dir = Path(dir)
            filename = '{post.id}.zip'
            self.logger.info('Downloading "%s"', zip_url)
            self.api.download(zip_url, path=str(temp_dir), name=filename)

            frames_dir = temp_dir / 'frames'
            os.mkdir(frames_dir)

            self._extract_zip(temp_dir / filename, frames_dir)

            video_name = self.get_filename(post, 'mp4')
            video_file = temp_dir / video_name
            self._generate_mp4_from_frames(video_file, frames_dir,
                                           ugoira_data.frames[0].delay)

            final_path = (Path(output_dir) / video_name).absolute()
            shutil.move(video_file, final_path)
            yield final_path.absolute()

    def get_filename(
        self,
        post,
        extension,
        prefix=None,
        suffix=None,
    ):
        suffix = suffix or ''
        prefix = prefix or ''
        filename = f'{prefix}{post.id}-{post.title}{suffix}.{extension}'.replace(
            '/', '_').replace(' ', '_')
        return filename

    def _extract_zip(self, zip_file, output_dir):
        self.logger.info('Extract "%s"', zip_file)
        with ZipFile(zip_file, 'r') as zip_file:
            zip_file.extractall(output_dir)

    def _generate_mp4_from_frames(self, output_file, frames_dir, delay):
        self.logger.info('Generate video to "%s"', output_file)
        frames = sorted(
            map(lambda file: os.path.join(str(frames_dir), file),
                os.listdir(frames_dir)))
        frames = list(map(imread, frames))

        framerate = 1000 / delay

        height, width, layers = frames[0].shape
        video = VideoWriter(str(output_file), VideoWriter_fourcc(*'mp4v'),
                            framerate, (width, height))

        for frame in frames:
            video.write(frame)

        destroyAllWindows()
        video.release()
Beispiel #6
0
class APIHandler:
    """Singleton that handles all the API interactions in the program"""
    def __init__(self):
        self._api_thread = threading.Thread(target=self._login)
        self._login_started = False
        self._login_done = False

        self._api = AppPixivAPI()  # Object to login and request on
        # Set in self.start() (because singleton is instantiated before config)
        self._credentials: 'dict[str, str]'
        self._response: 'Json'

    def start(self, credentials):
        """Start logging in. The only setup entry point that is public"""
        if not self._login_started:
            self._credentials = credentials
            self._api_thread.start()
            self._login_started = True

    def _await_login(self):
        """Wait for login to finish, then assign PixivAPI session to API"""
        if not self._login_done:
            self._api_thread.join()
            self._login_done = True

    def _login(self):
        self._login_with_token()

    def _login_with_token(self):
        # TODO: refresh the token if it expired
        try:
            self._response = self._api.auth(
                refresh_token=self._credentials['refresh_token'])
        except PixivError as e:
            print('')
            print(e)
            print('If this is a cloudflare captcha issue, just quit and retry')
            print('It is not a problem with koneko or pixivpy')
            print('Otherwise, please report to '
                  'https://github.com/akazukin5151/koneko/issues')
            print("Press 'q' and enter to exit")
        #else:
        #print('Login success!')

    # Public API requests for user id
    def get_user_id(self) -> 'Json':
        self._await_login()
        return self._response['user']['id']

    # Public API request functions for each mode
    @funcy.retry(tries=3, errors=(ConnectionError, PixivError))
    @utils.spinner('')
    def artist_gallery(self, artist_user_id, offset) -> 'Json':
        """Mode 1"""
        self._await_login()
        return self._api.user_illusts(artist_user_id, offset=offset)

    @funcy.retry(tries=3, errors=(ConnectionError, PixivError))
    def protected_illust_detail(self, image_id) -> 'Json':
        """Mode 2"""
        self._await_login()
        return self._api.illust_detail(image_id)

    @funcy.retry(tries=3, errors=(ConnectionError, PixivError))
    def following_user_request(self, user_id, publicity, offset) -> 'Json':
        """Mode 3"""
        self._await_login()
        return self._api.user_following(user_id,
                                        restrict=publicity,
                                        offset=offset)

    @funcy.retry(tries=3, errors=(ConnectionError, PixivError))
    def search_user_request(self, searchstr, offset) -> 'Json':
        """Mode 4"""
        self._await_login()
        return self._api.search_user(searchstr, offset=offset)

    @funcy.retry(tries=3, errors=(ConnectionError, PixivError))
    @utils.spinner('')
    def illust_follow_request(self, restrict, offset) -> 'Json':
        """Mode 5"""
        self._await_login()
        return self._api.illust_follow(restrict=restrict, offset=offset)

    @funcy.retry(tries=3, errors=(ConnectionError, PixivError))
    @utils.spinner('')
    def illust_related_request(self, image_id, offset) -> 'Json':
        """Mode 15 (1.5 * 10 so it's an int)"""
        self._await_login()
        return self._api.illust_related(illust_id=image_id, offset=offset)

    @funcy.retry(tries=3, errors=(ConnectionError, PixivError))
    @utils.spinner('')
    def illust_recommended_request(self, offset) -> 'Json':
        """Mode 6"""
        self._await_login()
        return self._api.illust_recommended(offset=offset)

    # Download
    @funcy.retry(tries=3, errors=(ConnectionError, PixivError))
    def protected_download(self, url, path, name) -> 'IO':
        """Protect api download function with funcy.retry so it doesn't crash"""
        self._await_login()
        self._api.download(url, path=path, name=name)
Beispiel #7
0
class Pixiv:
    def __init__(self):
        self.enabled  = config.getboolean('Pixiv', 'enabled', fallback=False)
        self._refresh_token = config.get('Pixiv', 'refresh_token', fallback=None)
        self._log = logging.getLogger(__name__)

        self._pixiv = AppPixivAPI()
        self._pixiv.set_accept_language(config.get('Pixiv', 'language', fallback='en-US'))

        self._re_twitter = re.compile(r'^https?://(www.)?twitter.com/(?P<username>.+)$')

        if self.enabled:
            self._login()

    def _login(self) -> None:
        """
        Authenticate to Pixiv
        Returns:
            None
        """
        self._log.debug(f'[PIXIV] Authenticating to Pixiv with the token {self._refresh_token}')
        try:
            self._pixiv.auth(refresh_token=self._refresh_token)
        except Exception as error:
            self._log.exception("[PIXIV] Failed to authenticate to Pixiv", exc_info=error)

    def get_illust(self, illust_id: int) -> typing.Optional[dict]:
        """
        Look up the provided illustration ID from SauceNao
        Args:
            illust_id (int):

        Returns:
            typing.Optional[dict]
        """
        if not self.enabled:
            return None

        illust = self._pixiv.illust_detail(illust_id)
        if 'error' in illust and 'invalid_grant' in illust['error']['message']:
            self._log.warning(f'Pixiv login session is no longer valid; re-authenticating with saved token')
            self._login()
            illust = self._pixiv.illust_detail(illust_id)

        return illust['illust'] if illust and 'illust' in illust else None

    def get_author(self, author_id: int) -> typing.Optional[dict]:
        """
        Get the author for the specified illustration
        Args:
            author_id (int):

        Returns:
            typing.Optional[dict]
        """
        if not self.enabled:
            return None

        user = self._pixiv.user_detail(author_id)
        if 'error' in user and 'invalid_grant' in user['error']['message']:
            self._log.info(f'Re-Authenticating to Pixiv with the saved refresh token')
            self._login()
            user = self._pixiv.user_detail(author_id)

        return user

    def get_author_twitter(self, author_id: int) -> typing.Optional[str]:
        """
        Get the Pixiv artists Twitter page, if available
        Args:
            author_id (int):

        Returns:
            typing.Optional[str]
        """
        if not self.enabled:
            return None

        user = self.get_author(author_id)

        twitter_url = user['profile']['twitter_url'] if (user and 'profile' in user) else None
        if twitter_url:
            match = self._re_twitter.match(twitter_url)
            if match and match.group('username'):
                return f"@{match.group('username')}"
Beispiel #8
0
def check_log(instance, **kwargs):
    log = instance
    title = 'test'
    if log.nick != 'maobot' and log.command == 'PRIVMSG' and log.nick != 'maobot_php':
        url_pat = re.compile(r"https?://[a-zA-Z0-9\-./?@&=:~_#]+")
        url_list = re.findall(url_pat, log.message)
        for url in url_list:
            r = requests.get(url)
            content_type_encoding = r.encoding if r.encoding != 'ISO-8859-1' else None
            soup = BeautifulSoup(r.content, 'html.parser', from_encoding=content_type_encoding)
            try:
                title = soup.title.string
                Log(command='NOTICE', channel=log.channel,
                    nick='maobot', message=title, is_irc=False).save()

            except (AttributeError, TypeError, HTTPError):
                pass

            # image dl
            nicoseiga_pat = re.compile(
                'http://seiga.nicovideo.jp/seiga/[a-zA-Z]+([0-9]+)')
            pixiv_pat = re.compile(
                'https://www.pixiv.net/member_illust.php/?\?([a-zA-Z0-9\-./?@&=:~_#]+)')
            twitter_pat = re.compile(
                'https://twitter.com/[a-zA-Z0-9_]+/status/\d+')
            image_format = ["jpg", "jpeg", "gif", "png"]

            if twitter_pat.match(url):
                try:
                    images = soup.find("div", {"class": "permalink-tweet-container"}).find("div", {"class": "AdaptiveMedia-container"}).findAll("div", {"class": "AdaptiveMedia-photoContainer"})
                except AttributeError:
                    images = soup.findAll("div", {"class": "media"})
                for image in images:
                    try:
                        image_url = image.find('img')['src']
                        img = image_from_response(requests.Session().get(image_url),
                                                Image(original_url=url, related_log=log, caption=title))
                        img.save()
                        Log.objects.filter(id=log.id).update(attached_image=img.thumb)
                    except:
                        pass
            elif nicoseiga_pat.match(url):
                seiga_login = '******'
                seiga_id = nicoseiga_pat.search(url).group(1)
                seiga_source = 'http://seiga.nicovideo.jp/image/source/%s' % seiga_id
                login_post = {'mail_tel': SECRET_KEYS['nicouser'],
                              'password': SECRET_KEYS['nicopass']}

                session = requests.Session()
                session.post(seiga_login, data=login_post)
                soup = BeautifulSoup(session.get(seiga_source).text, 'lxml')
                image_url = 'http://lohas.nicoseiga.jp%s' % soup.find(
                    'div', {'class': 'illust_view_big'})['data-src']
                img = image_from_response(requests.Session().get(image_url),
                                          Image(original_url=url, related_log=log))
                img.save()
                Log.objects.filter(id=log.id).update(attached_image=img.thumb)
            elif pixiv_pat.match(url):
                from pixivpy3 import AppPixivAPI
                from urllib.parse import parse_qs
                api = AppPixivAPI()
                api.login(SECRET_KEYS['pixiuser'], SECRET_KEYS['pixipass'])
                pixiv_query = pixiv_pat.search(url).group(1)
                pixiv_dict = parse_qs(pixiv_query)
                pixiv_id = pixiv_dict['illust_id']
                pixiv_illust = api.illust_detail(pixiv_id, req_auth=True).illust
                if 'meta_pages' in pixiv_illust and len(pixiv_illust.meta_pages) != 0:
                    image_urls = []
                    if 'page' in pixiv_dict:
                        image_urls.append(pixiv_illust.meta_pages[int(pixiv_dict['page'][0])].image_urls.large)
                    else:
                        for i in pixiv_illust.meta_pages:
                            image_urls.append(i.image_urls.large)
                else:
                    image_urls = [pixiv_illust.image_urls.large]
                for image_url in image_urls:
                    response = api.requests_call('GET', image_url,
                                                 headers={'Referer': 'https://app-api.pixiv.net/'},
                                                 stream=True)
                    img = image_from_response(response,
                                              Image(original_url=url, related_log=log, caption=pixiv_illust.title))
                    img.save()
                    Log.objects.filter(id=log.id).update(attached_image=img.thumb)
            elif url.split(".")[-1] in image_format:
                img = image_from_response(requests.Session().get(url),
                                          Image(original_url=url, related_log=log))
                img.save()
                Log.objects.filter(id=log.id).update(attached_image=img.thumb)
Beispiel #9
0
class CustomPixivPy:
    """
    A wrapper around PixivAPI and AppPixivAPI to facilitate automatic re-authentication
     (for required methods) and custom result format
    """
    TOKEN_LIFESPAN = datetime.timedelta(seconds=3600)
    MAX_PIXIV_RESULTS = 3000
    RESULTS_PER_QUERY = 50
    MAX_RETRIES = 5

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        # forces reauth() to trigger if any method is called:
        self.last_auth = datetime.datetime.fromtimestamp(0)
        self.refresh_token = ""
        self.aapi = AppPixivAPI(**kwargs)
        self.papi = PixivAPI(**kwargs)

    def login(self, refresh_token):
        self.refresh_token = refresh_token
        self.aapi.auth(refresh_token=refresh_token)
        self.papi.auth(refresh_token=refresh_token)
        self.last_auth = datetime.datetime.now()
        logger.debug('Pyxiv login done')
        return self  # allows chaining

    @retry
    def illust_ranking(self, mode='daily', offset=None):
        self.reauth()
        offset = (offset or 0) // self.RESULTS_PER_QUERY + 1
        return self.papi.ranking('illust',
                                 mode,
                                 offset,
                                 include_stats=False,
                                 image_sizes=['medium', 'large'])

    @retry
    def search_illust(self,
                      word,
                      search_target='text',
                      sort='date',
                      offset=None):
        self.reauth()
        offset = (offset or 0) // self.RESULTS_PER_QUERY + 1
        return self.papi.search_works(word,
                                      offset,
                                      mode=search_target,
                                      types=['illustration'],
                                      sort=sort,
                                      include_stats=False,
                                      image_sizes=['medium', 'large'])

    @retry
    def illust_detail(self, illust_id, req_auth=True):
        self.reauth()
        return self.aapi.illust_detail(illust_id, req_auth)

    def reauth(self):
        """Re-authenticates with pixiv if the last login was more than TOKEN_LIFESPAN ago"""
        if datetime.datetime.now() - self.last_auth > self.TOKEN_LIFESPAN:
            self.login(self.refresh_token)
            self.papi.auth(refresh_token=self.refresh_token)
            logger.debug("Reauth successful")
            self.last_auth = datetime.datetime.now()

    def get_pixiv_results(self, offset=None, *, query="", nsfw=False):
        """
        Get results from Pixiv as a dict
        If no parameters are given, SFW daily ranking is returned
        :param offset: Optional. page offset
        :param query: Optional. Specify a search query
        :param nsfw: Whether to allow NSFW illustrations, false by default
        :return: list of dicts containing illustration information
        """
        json_result, last_error = None, None
        for attempt in range(1, self.MAX_RETRIES + 1):
            try:
                json_result = self.search_illust(query, offset=offset, sort='popular') \
                    if query else self.illust_ranking('daily_r18' if nsfw else 'daily', offset=offset)
            except PixivError as e:
                if attempt == self.MAX_RETRIES:
                    logger.warning("Failed fetching Pixiv data: %s", e)
                    raise e from None
            else:
                break

        results = []
        if json_result.get('has_error'):
            return results

        it = json_result.response if query else (
            x['work'] for x in json_result.response[0]['works'])
        for img in it:
            if not nsfw and img['sanity_level'] == 'black':
                continue  # white = SFW, semi_black = questionable, black = NSFW
            results.append({
                'url':
                img['image_urls']['large'],
                'thumb_url':
                img['image_urls']['medium'],
                'title':
                img['title'],
                'user_name':
                img['user']['name'],
                'user_link':
                f"https://www.pixiv.net/en/users/{img['user']['id']}"
            })
            logger.debug(results[-1])
        return results