Ejemplo n.º 1
0
class Pixiv(DummySite):
    def __init__(self, url, username, password, proxy=None):
        proxies = get_proxy(proxy)
        requests_kwargs = {
            "timeout": (3, 10),
        }
        requests_kwargs.update(proxies)
        self.api = AppPixivAPI(**requests_kwargs)
        self._fetcher = PixivFetcher(**proxies)
        self.api.login(username, password)
        self._user_id = int(url.split("/")[-1])
        self._dir_name = None
        self._total_illustrations = 0
        self._fetch_user_detail()

    @property
    def fetcher(self):
        return self._fetcher

    @property
    def dir_name(self):
        assert self._dir_name is not None
        return self._dir_name

    def _fetch_user_detail(self):
        assert self._user_id is not None
        profile = self.api.user_detail(self._user_id)
        user = profile['user']
        self._dir_name = "-".join([
            user['name'],
            user['account'],
            str(user['id']),
        ])
        self._dir_name = normalize_filename(self._dir_name)
        self._total_illustrations = profile['profile']['total_illusts']
        return self.dir_name

    def _fetch_image_list(self, ):
        ret = self.api.user_illusts(self._user_id)
        while True:
            for illustration in ret.illusts:
                yield from parse_image_urls(illustration)
            if ret.next_url is None:
                break
            ret = self.api.user_illusts(**self.api.parse_qs(ret.next_url))

    def _fetch_single_image_url(self, illustration_id):
        json_result = self.api.illust_detail(illustration_id)
        illustration_info = json_result.illust
        return illustration_info.image_urls['large']

    @property
    def tasks(self):
        yield from self._fetch_image_list()
Ejemplo n.º 2
0
class APIHandler:
    """Singleton that handles all the API interactions in the program"""
    def __init__(self):
        self._api_thread = threading.Thread(target=self._login)
        self._login_started = False
        self._login_done = False

        self._api = AppPixivAPI()  # Object to login and request on
        # Set in self.start() (because singleton is instantiated before config)
        self._credentials: 'dict[str, str]'
        self._response: 'Json'

    def start(self, credentials):
        """Start logging in. The only setup entry point that is public"""
        if not self._login_started:
            self._credentials = credentials
            self._api_thread.start()
            self._login_started = True

    def _await_login(self):
        """Wait for login to finish, then assign PixivAPI session to API"""
        if not self._login_done:
            self._api_thread.join()
            self._login_done = True

    def _login(self):
        self._login_with_token()

    def _login_with_token(self):
        # TODO: refresh the token if it expired
        try:
            self._response = self._api.auth(
                refresh_token=self._credentials['refresh_token'])
        except PixivError as e:
            print('')
            print(e)
            print('If this is a cloudflare captcha issue, just quit and retry')
            print('It is not a problem with koneko or pixivpy')
            print('Otherwise, please report to '
                  'https://github.com/akazukin5151/koneko/issues')
            print("Press 'q' and enter to exit")
        #else:
        #print('Login success!')

    # Public API requests for user id
    def get_user_id(self) -> 'Json':
        self._await_login()
        return self._response['user']['id']

    # Public API request functions for each mode
    @funcy.retry(tries=3, errors=(ConnectionError, PixivError))
    @utils.spinner('')
    def artist_gallery(self, artist_user_id, offset) -> 'Json':
        """Mode 1"""
        self._await_login()
        return self._api.user_illusts(artist_user_id, offset=offset)

    @funcy.retry(tries=3, errors=(ConnectionError, PixivError))
    def protected_illust_detail(self, image_id) -> 'Json':
        """Mode 2"""
        self._await_login()
        return self._api.illust_detail(image_id)

    @funcy.retry(tries=3, errors=(ConnectionError, PixivError))
    def following_user_request(self, user_id, publicity, offset) -> 'Json':
        """Mode 3"""
        self._await_login()
        return self._api.user_following(user_id,
                                        restrict=publicity,
                                        offset=offset)

    @funcy.retry(tries=3, errors=(ConnectionError, PixivError))
    def search_user_request(self, searchstr, offset) -> 'Json':
        """Mode 4"""
        self._await_login()
        return self._api.search_user(searchstr, offset=offset)

    @funcy.retry(tries=3, errors=(ConnectionError, PixivError))
    @utils.spinner('')
    def illust_follow_request(self, restrict, offset) -> 'Json':
        """Mode 5"""
        self._await_login()
        return self._api.illust_follow(restrict=restrict, offset=offset)

    @funcy.retry(tries=3, errors=(ConnectionError, PixivError))
    @utils.spinner('')
    def illust_related_request(self, image_id, offset) -> 'Json':
        """Mode 15 (1.5 * 10 so it's an int)"""
        self._await_login()
        return self._api.illust_related(illust_id=image_id, offset=offset)

    @funcy.retry(tries=3, errors=(ConnectionError, PixivError))
    @utils.spinner('')
    def illust_recommended_request(self, offset) -> 'Json':
        """Mode 6"""
        self._await_login()
        return self._api.illust_recommended(offset=offset)

    # Download
    @funcy.retry(tries=3, errors=(ConnectionError, PixivError))
    def protected_download(self, url, path, name) -> 'IO':
        """Protect api download function with funcy.retry so it doesn't crash"""
        self._await_login()
        self._api.download(url, path=path, name=name)
Ejemplo n.º 3
0
class PixivSpider:
    def __init__(self):
        """
        Init PixivSpider
        """
        self.api = AppPixivAPI()
        self.directory = 'download'
        if not os.path.exists('info.json'):
            self.data = {'illusts': []}
            self.count = 0
            print("Create new info.json file")
        else:
            with open('info.json', 'r') as f:
                self.data = json.load(f)
                self.count = len(self.data['illusts'])
                print("Load existing info.json file")
                print("Existed illusts count: %d" % self.count)
        self.illusts_names = Set()
        for illust in self.data['illusts']:
            self.illusts_names.add(illust['name'])

    def login(self):
        """
        Login pixiv.net
        """
        with open('login.json') as f:
            login = json.load(f)
            self.api.login(login["username"], login["password"])
            print("Login pixiv.net with user %s.", login["username"])

    def exit(self):
        """
        Stop spider and print logs
        """
        with open('info.json', 'w') as f:
            json.dump(self.data, f, indent=2)
        print("Finish! Total downloaded illusts number: %d" % self.count)

    def create_download_folder(self):
        """
        Setup image download directory
        """
        if not os.path.exists(self.directory):
            os.makedirs(self.directory)

    def download_illusts(self, illusts=None):
        """
        Download illusts
        """
        for illust in illusts:
            image_url = illust.meta_single_page.get('original_image_url',
                                                    illust.image_urls.large)
            print(u"👀  Found illust: %s (%s)" % (illust.title, image_url))
            url_basename = os.path.basename(image_url)
            extension = os.path.splitext(url_basename)[1]
            name = "%d_%s%s" % (illust.id, illust.title, extension)
            name = name.replace('/', ':')
            if name not in self.illusts_names:
                self.count += 1
                self.data['illusts'].append({
                    'id': self.count,
                    'name': name,
                    'illust_id': illust.id,
                    'illustrator_id': illust.user.id,
                    'source_url': image_url
                })
                self.illusts_names.add(name)
                name = "%d_" % self.count + name
                try:
                    self.api.download(image_url,
                                      path=self.directory,
                                      name=name)
                except PixivError:
                    print(u"😢  PixivError!!! Skip this illust")
                    continue
                print(u"✅  Download illust: %s (%s)" %
                      (illust.title, image_url))
            else:
                print(u"✨  Already download: %s: %s" %
                      (illust.title, image_url))

    def get_user_ids_from_illusts(self, illusts):
        """
        Get user ids by illusts
        """
        user_ids = []
        for illust in illusts:
            user_ids.append(illust.user.id)
        return user_ids

    def get_top_ranking_illusts(self,
                                count=DEFAULT_DOWNLOAD_TOP_RANKING_COUNT,
                                ranking_type=RankingType.DAY,
                                date=datetime.today().strftime("%Y-%m-%d"),
                                download=False):
        """
        Get top ranking illusts
        :count: the number of illusts that we want to download
        :ranking_type: ranking type
        :date: date
        :download: download flag
        """
        json_result = self.api.illust_ranking(ranking_type, date=date)
        illusts = self.get_illusts_from_all_pages(json_result,
                                                  json_result.illusts, count,
                                                  download)
        return illusts[:count]

    def get_recommended_illusts(self,
                                count=DEFAULT_DOWNLOAD_RECOMMENDED_COUNT,
                                content_type=ContentType.ILLUST,
                                download=False):
        """
        Get recommended illusts
        :count: the number of illusts that we want to download
        :content_type: content type
        :download: download flag
        """
        json_result = self.api.illust_recommended(content_type)
        illusts = self.get_illusts_from_all_pages(json_result,
                                                  json_result.illusts, count,
                                                  download)
        return illusts[:count]

    def get_illusts_by_user_ids(self,
                                user_ids,
                                count=DEFAULT_DOWNLOAD_EACH_USER_COUNT,
                                content_type=ContentType.ILLUST,
                                download=False):
        """
        Get illusts by user id
        """
        ret = {}
        for user_id in user_ids:
            json_result = self.api.user_illusts(user_id=user_id,
                                                type=content_type)
            illusts = self.get_illusts_from_all_pages(json_result,
                                                      json_result.illusts,
                                                      count, download)
            ret[user_id] = illusts[:count]
        return ret

    def get_illusts_from_all_pages(self,
                                   json_result,
                                   illusts,
                                   count,
                                   download=False):
        """
        Get illusts from all pages
        """
        while len(json_result) != 0 and len(illusts) < count:
            next_qs = self.api.parse_qs(json_result.next_url)
            if next_qs is None:
                break
            try:
                json_result = self.api.illust_ranking(**next_qs)
            except TypeError:
                break
            illusts += json_result.illusts

        if download:
            count = min(count, len(illusts))
            self.download_illusts(illusts=illusts[:count])

        return illusts