Beispiel #1
0
 def __init__(self, **kwargs):
     super().__init__(**kwargs)
     # forces reauth() to trigger if any method is called:
     self.last_auth = datetime.datetime.fromtimestamp(0)
     self.refresh_token = ""
     self.aapi = AppPixivAPI(**kwargs)
     self.papi = PixivAPI(**kwargs)
Beispiel #2
0
class SetuService():
    def __init__(self, username, password):
        self._username = username
        self._password = password
        _fglfile = open('src/main/resources/fgl.txt', 'r')
        self._fgl = []
        for line in _fglfile.readlines():
            self._fgl.append(line.strip())
        _fglfile.close()
        self._api = PixivAPI(**_REQUESTS_KWARGS)
        self._api.login(self._username, self._password)

    def search(self, keyword, mode):
        _res = self._api.search_works(keyword,
                                      types=["illustration"],
                                      per_page=500,
                                      mode=mode,
                                      sort="popular")
        _illusts = _res.response
        #print(_res)
        for i in _illusts:
            if str(i.id) not in self._fgl:
                f = open('src/main/resources/fgl.txt', 'a')
                self._fgl.append(str(i.id))
                f.write(str(i.id) + '\n')
                f.close()
                return i.image_urls.large.replace(
                    "i.pximg.net", "i.pixiv.cat") + " " + str(i.id)
Beispiel #3
0
 def __init__(
         self,
         auth,
         work_path=os.path.abspath('../pixiv/'),
 ):
     self._api = PixivAPI()
     self._api.login(*auth)
     self._wd = work_path
Beispiel #4
0
 def __connect(self):
     if self.client is None:
         try:
             self.client = PixivAPI()
             self.client.login(cfg['pixiv']['帳號'], cfg['pixiv']['密碼'])
         except Exception as e:
             raise e
             return False
     return True
Beispiel #5
0
class Pixiv(Thread):
    def __init__(self):
        Thread.__init__(self)
        
    def run(self):
        self.client = PixivAPI()
        self.client.login(cfg['pixiv']['帳號'], cfg['pixiv']['密碼'])

        while True:
Beispiel #6
0
	def __init__(self, _user, _pass, default_path=r'F:/PIXIV'):
		self._user, self._pass = (_user, _pass)

		self.api = PixivAPI()
		self.pixiv_utils = PixivUtils()

		self.pastes = Queue() 
		self.default_path = default_path

		self.local_pixiv_ids = []
Beispiel #7
0
 def __init__(self, username, password):
     self._username = username
     self._password = password
     _fglfile = open('src/main/resources/fgl.txt', 'r')
     self._fgl = []
     for line in _fglfile.readlines():
         self._fgl.append(line.strip())
     _fglfile.close()
     self._api = PixivAPI(**_REQUESTS_KWARGS)
     self._api.login(self._username, self._password)
Beispiel #8
0
    def __init__(self, auto_re_login=True, **requests_kwargs):
        self.auto_re_login = auto_re_login
        self._requests_kwargs = requests_kwargs

        self._papi = PixivAPI(**requests_kwargs)
        self._aapi = AppPixivAPI(**requests_kwargs)

        self._has_auth = False
        self._last_login = None
        self._check_auth_lock = Lock()

        self._username = None
        self._password = None
Beispiel #9
0
 def __init__(self, name, app_config={}):
     config_path = Path(app_config.get('handlers_config_dir', '.')) / 'pixiv.toml'
     data_path = Path(app_config.get('data_dir', './data/')) / '{}.toml'.format(name)
     self.config = Config(config_path, write_defaults=True, defaults={
         'refresh': 'xxxx',
     })
     self.config.save()
     self.data = Config(data_path)
     self.age_filter = None
     self.api = PixivAPI()
     if self.config.get('refresh'):
         print('logging in to Pixiv...')
         login_response = self.api.auth(refresh_token=self.config['refresh'])
         print('logged in into account {0.name} ({0.account}) [{0.id}]'.format(login_response['response']['user']))
Beispiel #10
0
 def __init__(self, dbDict, config):
     self.config = config
     self.dbDict = dbDict
     self.username = config['PIXIV_USERNAME']
     self.password = config['PIXIV_PASSWORD']
     self.imageDirectory = os.path.join(config['PIXIV_DOWNLOAD_DIRECTORY'],
                                        'images')
     self.ugoiraDirectory = os.path.join(config['PIXIV_DOWNLOAD_DIRECTORY'],
                                         'ugoira')
     self.avatarDirectory = os.path.join(config['PIXIV_DOWNLOAD_DIRECTORY'],
                                         'avatars')
     os.makedirs(self.imageDirectory, exist_ok=True)
     os.makedirs(self.ugoiraDirectory, exist_ok=True)
     os.makedirs(self.avatarDirectory, exist_ok=True)
     self.api = PixivAPI()
     self.authorize()
Beispiel #11
0
 def __init__(self, dbDict, config):
     self.dbDict = dbDict
     self.username = config['PIXIV_USERNAME']
     self.password = config['PIXIV_PASSWORD']
     self.downloadDirectory = config['PIXIV_DOWNLOAD_DIRECTORY']
     self.avatarDirectory = config['PIXIV_AVATAR_DIRECTORY']
     os.makedirs(self.downloadDirectory, exist_ok=True)
     os.makedirs(self.avatarDirectory, exist_ok=True)
     self.api = PixivAPI()
     self.authorize()
 def importIllustJSON(self):
     #Login to Pixiv API
     self.api = PixivAPI()
     self.api.login(pixivLogin["pixivusername"],
                    pixivLogin["pixivpassword"])
     userURL = "https://www.pixiv.net/member_id="
     self.JSON = self.api.works(self.ID)['response'][0]
     self.manga = self.JSON['is_manga']
     self.account = self.JSON['user']['account']
     self.name = self.JSON['user']['name']
     self.user_ID = self.JSON['user']['id']
     self.user_URL = userURL + str(self.user_ID)
     self.title = self.JSON['title']
     self.tags = self.JSON['tags']
     self.pages = self.JSON['page_count']
     if self.pages > 1:
         for page in range(self.pages - 1):
             self.image_URLs.append(self.JSON['metadata']["pages"][page]
                                    ["image_urls"]['large'])
     else:
         self.image_URLs.append(self.JSON['image_urls']['large'])
Beispiel #13
0
class CustomPixivPy:
    """
    A wrapper around PixivAPI and AppPixivAPI to facilitate automatic re-authentication
     (for required methods) and custom result format
    """
    TOKEN_LIFESPAN = datetime.timedelta(seconds=3600)
    MAX_PIXIV_RESULTS = 3000
    RESULTS_PER_QUERY = 50
    MAX_RETRIES = 5

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        # forces reauth() to trigger if any method is called:
        self.last_auth = datetime.datetime.fromtimestamp(0)
        self.refresh_token = ""
        self.aapi = AppPixivAPI(**kwargs)
        self.papi = PixivAPI(**kwargs)

    def login(self, refresh_token):
        self.refresh_token = refresh_token
        self.aapi.auth(refresh_token=refresh_token)
        self.papi.auth(refresh_token=refresh_token)
        self.last_auth = datetime.datetime.now()
        logger.debug('Pyxiv login done')
        return self  # allows chaining

    @retry
    def illust_ranking(self, mode='daily', offset=None):
        self.reauth()
        offset = (offset or 0) // self.RESULTS_PER_QUERY + 1
        return self.papi.ranking('illust',
                                 mode,
                                 offset,
                                 include_stats=False,
                                 image_sizes=['medium', 'large'])

    @retry
    def search_illust(self,
                      word,
                      search_target='text',
                      sort='date',
                      offset=None):
        self.reauth()
        offset = (offset or 0) // self.RESULTS_PER_QUERY + 1
        return self.papi.search_works(word,
                                      offset,
                                      mode=search_target,
                                      types=['illustration'],
                                      sort=sort,
                                      include_stats=False,
                                      image_sizes=['medium', 'large'])

    @retry
    def illust_detail(self, illust_id, req_auth=True):
        self.reauth()
        return self.aapi.illust_detail(illust_id, req_auth)

    def reauth(self):
        """Re-authenticates with pixiv if the last login was more than TOKEN_LIFESPAN ago"""
        if datetime.datetime.now() - self.last_auth > self.TOKEN_LIFESPAN:
            self.login(self.refresh_token)
            self.papi.auth(refresh_token=self.refresh_token)
            logger.debug("Reauth successful")
            self.last_auth = datetime.datetime.now()

    def get_pixiv_results(self, offset=None, *, query="", nsfw=False):
        """
        Get results from Pixiv as a dict
        If no parameters are given, SFW daily ranking is returned
        :param offset: Optional. page offset
        :param query: Optional. Specify a search query
        :param nsfw: Whether to allow NSFW illustrations, false by default
        :return: list of dicts containing illustration information
        """
        json_result, last_error = None, None
        for attempt in range(1, self.MAX_RETRIES + 1):
            try:
                json_result = self.search_illust(query, offset=offset, sort='popular') \
                    if query else self.illust_ranking('daily_r18' if nsfw else 'daily', offset=offset)
            except PixivError as e:
                if attempt == self.MAX_RETRIES:
                    logger.warning("Failed fetching Pixiv data: %s", e)
                    raise e from None
            else:
                break

        results = []
        if json_result.get('has_error'):
            return results

        it = json_result.response if query else (
            x['work'] for x in json_result.response[0]['works'])
        for img in it:
            if not nsfw and img['sanity_level'] == 'black':
                continue  # white = SFW, semi_black = questionable, black = NSFW
            results.append({
                'url':
                img['image_urls']['large'],
                'thumb_url':
                img['image_urls']['medium'],
                'title':
                img['title'],
                'user_name':
                img['user']['name'],
                'user_link':
                f"https://www.pixiv.net/en/users/{img['user']['id']}"
            })
            logger.debug(results[-1])
        return results
Beispiel #14
0
    def run(self):
        self.client = PixivAPI()
        self.client.login(cfg['pixiv']['帳號'], cfg['pixiv']['密碼'])

        while True:
Beispiel #15
0
class PixivClip:
	def __init__(self, _user, _pass, default_path=r'F:/PIXIV'):
		self._user, self._pass = (_user, _pass)

		self.api = PixivAPI()
		self.pixiv_utils = PixivUtils()

		self.pastes = Queue() 
		self.default_path = default_path

		self.local_pixiv_ids = []

	def login(self):
		self.api.login(self._user, self._pass)

	def get_illust(self, illust_id):
		if not self.api.access_token:
			self.login()
		try:
			json_result = self.api.works(illust_id)
			json_result = json_result.response
		except AttributeError:
			raise AttributeError(json_result)
		except utils.PixivError as e:
			self.login()
			print(e, self.api.access_token, json_result)
			return self.get_illust(illust_id)
		else:			
			json_result = self.pixiv_utils.is_single_array(json_result)
			return json_result

	def work(self, illust_id):
		try:
			illust = self.get_illust(illust_id)
		except AttributeError as json_result:
			# print(json_result.has_error)
			# print(json_result.status)
			print(json_result)
			print(dir(json_result))
			return
		else:
			pass
		illust_type = illust.type
		print(illust)


	def refresh_local_pixiv_ids(self):
		for i in self.pixiv_utils.list_imgs_pixiv_ids_in_dir(self.default_path):
			self.local_pixiv_ids.append(i)

	def callback(self, url):
		illust_id = self.pixiv_utils.parse_url_for_id(url)
		if illust_id in self.pastes.queue:
			print(illust_id, "Already In Queue!")
			return
		self.pastes.put(illust_id)

	def watch(self):
		watcher = ClipWatcher(self.pixiv_utils.is_pixiv_illust_url, self.callback)
		try:
			for i in watcher.start():
				print('PIXIV LINKS:', self.pastes.qsize(), i, flush=True, end='\r')
		except (KeyboardInterrupt, Exception) as e:
			watcher.stop()
		return watcher

	def print_pastes_queue(self):
		while not self.pastes.empty():
			illust_id = self.pastes.get()
			print(illust_id)

		input("PAUSED!!!")

	def begin(self):
		watcher = self.watch()
		self.print_pastes_queue()
Beispiel #16
0
from threading import Thread 
from api import cfg
from pixivpy3 import PixivAPI

try:
    pixiv_client = PixivAPI()
    pixiv_client.login(cfg['pixiv']['帳號'], cfg['pixiv']['密碼'])
except Exception as e:
    print('<pixiv模組初始失敗> %s' % str(e))
    pixiv_client = None


class Pixiv(Thread):
    def __init__(self):
        Thread.__init__(self)
        
    def run(self):
        self.client = PixivAPI()
        self.client.login(cfg['pixiv']['帳號'], cfg['pixiv']['密碼'])

        while True:



def pixiv_search(key):
    if pixiv_client is None:
        return '此功能現在沒有開放'
    f = pixiv_client.search_works(key, mode='tag')
    d = []
    for i in f['response']:
        d.append('(*%s) %s\n%s' % (i['stats']['favorited_count']['public'], i['title'], 'pixiv.net/member_illust.php?mode=medium&illust_id=' % i['id']))
Beispiel #17
0
from pixivpy_async import PixivClient
from pixivpy_async import AppPixivAPI
from pixivpy_async import PixivAPI
from pixivpy3 import AppPixivAPI as Sync_aapi
from pixivpy3 import PixivAPI as Sync_papi

sys.dont_write_bytecode = True

_USERNAME = "******"
_PASSWORD = "******"
_TOKEN = "uXooTT7xz9v4mflnZqJUO7po9W5ciouhKrIDnI2Dv3c"

saapi = Sync_aapi()
# saapi.login(_USERNAME, _PASSWORD)
saapi.login(refresh_token=_TOKEN)
spapi = Sync_papi()
# spapi.login(_USERNAME, _PASSWORD)
spapi.login(refresh_token=_TOKEN)


def gen_date():
    """201x-0x-xx"""
    year = random.randint(3, 9)
    month = random.randint(1, 9)
    day = random.randint(10, 29)
    return '201%s-0%s-%s' % (year, month, day)


def test_sync_illust(num):
    e = time.time()
    for i in range(num):
Beispiel #18
0
class Pixiv(Thread):
    search_on = 0.0  #最後查詢時間
    client = None

    def __init__(self):
        Thread.__init__(self)

    def __connect(self):
        if self.client is None:
            try:
                self.client = PixivAPI()
                self.client.login(cfg['pixiv']['帳號'], cfg['pixiv']['密碼'])
            except Exception as e:
                raise e
                return False
        return True

    def run(self):
        pass

    def search(self, key, number=30):
        if not self.__connect():
            return 'Pixiv模組發生錯誤 暫時不能使用'

        if number > 1000:
            number = 1000

        if key[0] == '@':
            result = self.client.users_works(int(key[1:]))
        else:
            result = self.client.search_works(
                key,
                page=1,
                per_page=number,
                mode='tag',  # text標題 tag標籤 exact_tag精準標籤 caption描述
                period='all',  # all所有 day一天內 week一週內 month一月內
                order='desc',  # desc新順序 asc舊順序
                sort='date',
            )

        if result.status == 'failure':
            return '找不到 <%s>' % (key)

        result_rank = []
        for i in result.response:
            for i2 in result_rank:
                if i.stats.views_count > i2.stats.views_count:
                    result_rank.insert(result_rank.index(i2), i)
                    break
            else:
                result_rank.append(i)

        reply = []
        for i in result_rank:
            self.client.download(i.image_urls.px_480mw,
                                 path=cfg['temp_dir'],
                                 name=str(i.id))  #px_128x128 px_480mw
            print('%s\\%s' % (cfg['temp_dir'], i.id))
            url = imgur.upload('%s\\%s' % (cfg['temp_dir'], i.id))
            #url = 'http://temp.maou.pw/%s' % (i.id)
            reply.append(url)
            if len(reply) >= 4:
                break

        url = 'https://www.pixiv.net/search.php?word=123&s_mode=s_tag_full'
        reply = reply[:4]
        reply.append(url)
        return reply

    def rss(self):
        if not self.__connect():
            return 'Pixiv模組錯誤'
Beispiel #19
0
class Worker:
    writes = 0
    api = PixivAPI()
    root_dir = "image"
    size = "large"
    latest_time = "2000-01-18 23:41:35"

    def login(self, username, password):
        self.token = self.api.login(username, password)

    #循环检查路径是否存在
    def check_root_dir(self, path_list):
        if not os.path.exists(self.root_dir):
            os.mkdir(self.root_dir)
        cur_path = self.root_dir
        for path in path_list:
            cur_path = os.path.join(cur_path, path)
            if not os.path.exists(cur_path):
                os.mkdir(cur_path)

    def check_result(self, result, filepath):
        if cmp(result["status"], "success") != 0:
            file(filepath, "w").write(json.dumps(result))
            raise PixivError(json.dumps(result))

    def pull_following_works(self,
                             time,
                             nums=10,
                             flt={
                                 'illustration': True,
                                 'manga': True,
                                 'ugoira': True
                             }):
        result = self.api.me_following_works()
        self.check_root_dir(["following"])
        curpg = 1
        per_pg = 30
        self.check_result(
            result, os.path.join(self.root_dir, "following", "error.json"))
        file(os.path.join(self.root_dir, "following", "metadata.json"),
             "w").write(json.dumps(result))
        total = result["pagination"]["total"]
        nums = (nums < total and [nums] or [total])[0]
        for i in range(0, nums):
            if curpg < i / per_pg + 1:
                curpg += 1
                result = self.api.me_following_works(page=curpg)
                self.check_result(
                    result,
                    os.path.join(self.root_dir, "following", "error.json"))

            idx = i % per_pg
            info_json = result["response"][idx]
            reup_time = info_json["reuploaded_time"]
            #过滤掉不想要的图片
            if not flt[info_json["type"]]:
                continue
            #这个方法在抓取排行时不可取
            #在上次扒取的时间点之前的图片就放弃
            #if datetime.strptime(reup_time,"%Y-%m-%d %H:%M:%S") < time:
            #    continue
            self.save_work(info_json, os.path.join(self.root_dir, "following"))
            if datetime.strptime(self.latest_time,
                                 "%Y-%m-%d %H:%M:%S") < datetime.strptime(
                                     reup_time, "%Y-%m-%d %H:%M:%S"):
                self.latest_time = reup_time
        return result

    def pull_ranking_works(self,
                           time,
                           nums=10,
                           work_type="all",
                           rank_type="daily",
                           flt={
                               'illustration': True,
                               'manga': True,
                               'ugoira': True
                           }):
        self.check_root_dir([work_type, rank_type])
        result = self.api.ranking(ranking_type=work_type, mode=rank_type)
        curpg = 1
        per_pg = 50

        self.check_result(
            result,
            os.path.join(self.root_dir, work_type, rank_type, "error.json"))

        file(
            os.path.join(self.root_dir, work_type, rank_type, "metadata.json"),
            "w").write(json.dumps(result))
        total = result["pagination"]["total"]
        nums = (nums < total and [nums] or [total])[0]
        for i in range(0, nums):
            if curpg < i / per_pg + 1:
                curpg += 1
                result = self.api.me_following_works(page=curpg,
                                                     ranking_type=rank_type,
                                                     mode=work_type)
                self.check_result(
                    result,
                    os.path.join(self.root_dir, work_type, rank_type,
                                 "error.json"))

                file(
                    os.path.join(self.root_dir, work_type, rank_type,
                                 "metadata.json"),
                    "w").write(json.dumps(result))
            idx = i % per_pg
            info_json = result["response"][0]["works"][idx]["work"]
            reup_time = info_json["reuploaded_time"]
            #过滤掉不想要的图片
            if not flt[info_json["type"]]:
                continue
            #在上次扒取的时间点之前的图片就放弃
            #if datetime.strptime(reup_time,"%Y-%m-%d %H:%M:%S") < time:
            #    continue
            self.save_work(info_json,
                           os.path.join(self.root_dir, work_type, rank_type))
            if datetime.strptime(self.latest_time,
                                 "%Y-%m-%d %H:%M:%S") < datetime.strptime(
                                     reup_time, "%Y-%m-%d %H:%M:%S"):
                self.latest_time = reup_time
        return result

    def save_work(self, info_json, path):
        #过滤不想保存的作品
        if cmp(info_json["type"], "ugoira") == 0:
            self.save_ugoira(info_json, path)
        if cmp(info_json["type"], "illustration") == 0:
            self.save_image(info_json, path)
        if cmp(info_json["type"], "manga") == 0:
            self.save_image(info_json, path)
        return

    def save_ugoira(self, info_json, path):
        full_info = self.api.works(info_json["id"])
        urls = full_info["response"][0]["metadata"]["zip_urls"]

        first_url = None
        for item in urls:
            first_url = urls[item]
            break
        file_name = first_url[first_url.rfind("/") + 1:]
        if os.path.exists(os.path.join(path, file_name)):
            #print u"文件已存在:跳过"
            return first_url
        file(os.path.join(path, "%s.json" % (file_name)),
             "w").write(json.dumps(full_info))
        res = self.api.auth_requests_call("GET", first_url)
        data = res.content
        file(os.path.join(path, file_name), "wb").write(data)
        self.writes += int(res.headers["content-length"])
        #not finish
        print first_url
        print res.headers["content-length"]
        return first_url

    def save_image(self, info_json, path):
        page_count = info_json["page_count"]
        #如果是漫画,也就是多幅图片就存在id文件夹下面
        if page_count != 1:
            cur_path = os.path.join(path, "%d" % info_json["id"])
            if not os.path.exists(cur_path):
                os.mkdir(cur_path)
        else:
            cur_path = path

        for i in range(0, page_count):
            origin_url = info_json["image_urls"][self.size]
            p_idx = origin_url.rfind("_p")
            if cmp(self.size, "large") == 0:
                r_idx = origin_url.rfind(".")
            else:
                r_idx = origin_url.rfind("_")

            img_url = "%s_p%d%s" % (origin_url[:p_idx], i, origin_url[r_idx:])
            file_name = img_url[img_url.rfind("/") + 1:]
            if os.path.exists(os.path.join(cur_path, file_name)):
                #print u"文件已存在:跳过"
                continue
            res = self.api.auth_requests_call("GET", img_url)
            data = res.content
            file(os.path.join(cur_path, file_name), "wb").write(data)
            self.writes += int(res.headers["content-length"])
            print os.path.join(cur_path, file_name)
            print res.headers["content-length"]
Beispiel #20
0
from create_database import configs
import time
from pixivpy3 import PixivAPI

_REQUESTS_KWARGS = {
    'proxies': {
        'https': configs.proxy,
    },
    'verify':
    True,  # PAPI use https, an easy way is disable requests SSL verify
}

start_time = time.time()
api = PixivAPI(**_REQUESTS_KWARGS)
api.set_auth(configs.pixiv.access_token, configs.pixiv.refresh_token)
# api.login(configs.pixiv.user, configs.pixiv.passwd)
# json_result = api.illust_detail(59580629)
# illust = json_result.illust
# print(">>> origin url: %s" % illust.image_urls['large'])
# api.auth(configs.pixiv.user, configs.pixiv.passwd, configs.pixiv.refresh_token)
json_result = api.works(46363414)
print(json_result)
illust = json_result.response[0]
print(">>> %s, origin url: %s" % (illust.caption, illust.image_urls['large']))

end_time = time.time()
print(end_time - start_time, 's')
Beispiel #21
0
class Pixiv(object):
    def __init__(self, dbDict, config):
        self.config = config
        self.dbDict = dbDict
        self.username = config['PIXIV_USERNAME']
        self.password = config['PIXIV_PASSWORD']
        self.imageDirectory = os.path.join(config['PIXIV_DOWNLOAD_DIRECTORY'],
                                           'images')
        self.ugoiraDirectory = os.path.join(config['PIXIV_DOWNLOAD_DIRECTORY'],
                                            'ugoira')
        self.avatarDirectory = os.path.join(config['PIXIV_DOWNLOAD_DIRECTORY'],
                                            'avatars')
        os.makedirs(self.imageDirectory, exist_ok=True)
        os.makedirs(self.ugoiraDirectory, exist_ok=True)
        os.makedirs(self.avatarDirectory, exist_ok=True)
        self.api = PixivAPI()
        self.authorize()

    def authorize(self):
        self.api.login(self.username, self.password)

    def loadWorks(self):
        print('Retrieving Pixiv works')
        self.authorize()
        apiWorks = self.api.me_following_works(
            1, self.config['MAX_WORKS_ON_PAGE'])
        workDicts = apiWorks['response']
        workDicts = [w for w in workDicts]
        [self._getImageData(workDict) for workDict in workDicts]

    def loadExtraWorkInfo(self):
        updates = []
        worksToUpdate = [
            work for work in self.dbDict['works'].values()
            if work['website'] == 'Pixiv' and not work.get('imageUrls')
        ]
        if worksToUpdate:
            print("Found {} new Pixiv works".format(len(worksToUpdate)))
        for work in worksToUpdate:
            imageDict = work['pixivMeta']
            extraInfo = {
                'authorAvatarUrl':
                self._getAvatarUrl(
                    str(
                        imageDict.get('user').get('profile_image_urls').get(
                            'px_50x50'))),
                'imageUrls':
                self._getImageUrls(imageDict),
                'pixivMeta':
                '',
            }
            updates.append((work['identifier'], extraInfo))

        [
            self.dbDict['works'][identifier].update(extraInfo)
            for (identifier, extraInfo) in updates
        ]

    def _getImageData(self, imageDict):
        identifier = str(imageDict.get('id'))
        if identifier not in self.dbDict[
                'works']:  # Skip images we've already loaded
            user = imageDict.get('user') or {}
            imageData = {
                'identifier':
                identifier,
                'authorName':
                str(user.get('name')),
                'authorHandle':
                str(user.get('account')),
                'authorAvatarUrl':
                None,
                'profileUrl':
                'http://www.pixiv.net/member.php?id=' + str(user.get('id')),
                'website':
                'Pixiv',
                'imageTitle':
                str(imageDict.get('title')),
                'imageUrls':
                None,
                'imagePageUrl':
                'http://www.pixiv.net/member_illust.php?mode=medium&illust_id='
                + str(imageDict.get('id')),
                'imageTimestamp':
                self._parseTime(imageDict),
                'imageType':
                str(imageDict.get('type')),
                'nsfw':
                str(imageDict.get('age_limit') != 'all-age'),
                'width':
                str(imageDict.get('width')) or '500',
                'height':
                str(imageDict.get('height')) or '500',
                'success':
                str(imageDict.get('status') == 'success'),
                'error':
                str(imageDict.get('errors')),
                'pixivMeta':
                imageDict,  #stores the pixiv API info to facilitate late download of images
            }

            self.dbDict['works'][identifier] = imageData

    def _parseTime(self, imageDict):
        s = max(imageDict.get('created_time', ''),
                imageDict.get('reupoloaded_time', ''))
        return datetime.datetime.strptime(s, '%Y-%m-%d %H:%M:%S').replace(
            tzinfo=pytz.timezone("Asia/Tokyo")).astimezone(
                pytz.utc).isoformat()

    def _getAvatarUrl(self, remoteUrl):
        return self._downloadImage(remoteUrl, self.avatarDirectory)

    def _getImageUrls(self, imageDict):
        workType = imageDict.get('type')

        if imageDict.get('is_manga'):
            response = self.api.works(imageDict['id'])
            response = response.get('response')[0] or {}
            metadata = response.get('metadata') or {}
            pages = metadata.get('pages') or []

            def getMangaUrl(d):
                urld = d.get('image_urls')
                return self._generateImageUrl(
                    urld.get('small') or urld.get('medium')
                    or urld.get('large'))

            urls = [getMangaUrl(item) for item in pages]

        # Weird special case: "type" field in Pixiv JSON can be "manga" while "is_manga" is False
        # In this case there is only a single image URL and the JSON is formatted like an illustration
        elif workType == 'illustration' or (workType == 'manga'
                                            and not imageDict.get('is_manga')):
            urlDict = imageDict.get('image_urls') or {}
            urls = [
                self._generateImageUrl(
                    urlDict.get('small') or urlDict.get('medium')
                    or urlDict.get('large'))
            ]

        elif workType == 'ugoira':
            return self._constructUgoira(imageDict.get('id'))

        else:
            #Default case; all response types seem to have at least something in image_urls
            urlDict = imageDict.get('image_urls') or {}
            urls = [
                urlDict.get('small') or urlDict.get('medium')
                or urlDict.get('large')
            ]

        urls = [self._downloadImage(url, self.imageDirectory) for url in urls]
        return urls

    def _generateImageUrl(self, url):
        # Construct the URL for the full-res image. Super brittle; entirely dependent on Pixiv never changing anything
        leftSide = url[:url[8:].find('/') +
                       9]  #Split on first slash after https://
        rightSide = url[url.find('/img/'):].replace('_master1200', '')
        return leftSide + 'img-original' + rightSide

    def _downloadImage(self, url, directory):
        name = url[url.rfind('/') + 1:url.rfind('.')]
        extant = {
            name.split('.')[0]: os.path.join(directory, name)
            for name in os.listdir(directory)
        }
        if extant.get(name):
            print('Already downloaded {}'.format(url))
            return extant.get(name)

        print('Downloading ' + url)

        def attemptDownload(attemptUrl, suffix):
            attemptUrl = '.'.join((attemptUrl.rpartition('.')[0], suffix))
            return requests.get(
                attemptUrl,
                headers={'referer': attemptUrl[:attemptUrl.find('/img')]},
                stream=True)

        r = attemptDownload(url, 'png')
        if r.status_code == 404:
            r = attemptDownload(url, 'jpg')
            if r.status_code == 404:
                r = attemptDownload(url, 'gif')

        if r.status_code == 200:
            filename = url.split('/')[-1]
            filepath = os.path.join(directory, filename)
            with open(filepath, 'wb') as f:
                for chunk in r:
                    f.write(chunk)
            return '/'.join((directory, filename))
        else:
            return r.status_code + ' ' + url

    def _constructUgoira(self, identifier):
        directory = os.path.join(self.ugoiraDirectory, str(identifier))
        os.makedirs(directory, exist_ok=True)

        response = self.api.works(identifier)
        response = response.get('response')[0] or {}
        metadata = response.get('metadata') or {}
        frameTimes = [
            'duration {}'.format(delay['delay_msec'] / 1000)
            for delay in metadata.get('frames')
        ]
        zipUrl = sorted(metadata['zip_urls'].items())[-1][
            1]  # I don't think zip_urls will ever be longer than 1 but ??

        zipPath = self._downloadUgoiraZip(zipUrl, directory)
        with zipfile.ZipFile(zipPath, 'r') as zap:
            zap.extractall(directory)

        imagePaths = [
            "file '{}'".format(fileName) for fileName in os.listdir(directory)
            if not fileName.endswith('.zip')
        ]
        frameData = '\n'.join(itertools.chain(*zip(imagePaths, frameTimes)))

        concatFile = os.path.join(directory, 'concat.txt')
        print('Writing frame data to: {}'.format(concatFile))
        with open(concatFile, 'w') as f:
            f.write(frameData)

        concatFile = os.path.abspath(os.path.join(os.getcwd(), concatFile))
        workingDirectory = os.path.abspath(os.path.join(
            os.getcwd(), directory))
        outFile = os.path.join(directory, '{}.webm'.format(identifier))
        ffmpeg = 'ffmpeg -n -f concat -i {} -c:v libvpx -crf 10 -b:v 2M {}.webm'.format(
            concatFile, identifier)
        print('Rendering video to {}'.format(outFile))
        subprocess.run(ffmpeg, shell=True, cwd=workingDirectory)
        print('Finished rendering')

        return [outFile]

    def _downloadUgoiraZip(self, url, directory):
        print('Downloading ugoira zip: {}'.format(url))
        path = os.path.join(directory, url.split('/')[-1])
        if os.path.exists(path):
            print('Zip already downloaded; skipping')
        else:
            r = requests.get(url,
                             headers={'referer': url[:url.find('/img')]},
                             stream=True)
            with open(path, 'wb') as f:
                for chunk in r.iter_content(chunk_size=1024):
                    if chunk:  # filter out keep-alive new chunks
                        f.write(chunk)

        return path
Beispiel #22
0
 def __init__(self, credentials):
     self.apapi = AppPixivAPI()
     self.papi = PixivAPI()
     self.apapi.auth(refresh_token=credentials[0])
     self.papi.auth(refresh_token=credentials[0])
Beispiel #23
0
class PixivHandler:
    def __init__(self, name, app_config={}):
        config_path = Path(app_config.get('handlers_config_dir', '.')) / 'pixiv.toml'
        data_path = Path(app_config.get('data_dir', './data/')) / '{}.toml'.format(name)
        self.config = Config(config_path, write_defaults=True, defaults={
            'refresh': 'xxxx',
        })
        self.config.save()
        self.data = Config(data_path)
        self.age_filter = None
        self.api = PixivAPI()
        if self.config.get('refresh'):
            print('logging in to Pixiv...')
            login_response = self.api.auth(refresh_token=self.config['refresh'])
            print('logged in into account {0.name} ({0.account}) [{0.id}]'.format(login_response['response']['user']))

    def set_age_filter(self, filter):
        self.age_filter = filter

    def handle(self, feed):
        if feed == 'followings':
            data = self.api.me_following_works(image_sizes=['large', 'medium'], include_stats=False)
        elif feed == 'bookmarks':
            data = self.api.me_favorite_works()
        else:
            return []
        if data['status'] != 'success':
            print('invalid response')
            print('got:')
            print(data)
            return []
        results = data['response']
        save_data = self.data.get(feed, {'last_id': 0})
        print('latest id: {}'.format(save_data.get('last_id')))
        results = list(filter(lambda x: x['id'] > save_data.get('last_id'), results))
        if len(results) == 0:
            return []
        save_data['last_id'] = results[0]['id']
        self.data[feed] = save_data
        self.data.save()
        ret = []
        for entry in results:
            print('Handling pixiv entry {}'.format(entry['id']))
            if self.age_filter != None:
                if entry['age_limit'] in ['r18', 'r18-g'] and self.age_filter == 'safe':
                    print('skipping because currently in safe mode')
                    continue
                if entry['age_limit'] == 'all-age' and self.age_filter == 'r18':
                    print('skipping because currently in r18 mode')
                    continue
            content = '<https://www.pixiv.net/artworks/{}>'.format(entry['id'])
            content += '\n{} by {} ({})'.format(entry['title'], entry['user']['name'], entry['user']['account'])
            content += '\nTags: {}'.format(' '.join(entry['tags']))
            if entry['is_manga']:
                print('it\'s a manga')
                work = self.api.works(entry['id'])
                if work['status'] != 'success':
                    continue
                work = work['response']
                if len(work) == 0:
                    continue
                work = work[0]
                urls = [x['image_urls']['medium'] for x in work['metadata']['pages']]
                if len(urls) > 4:
                    content += '\n{} more pictures not shown here'.format(len(urls) - 4)
                    urls = urls[:4]
            else:
                if entry['width'] > 2000 or entry['height'] > 2000:
                    content += '\n(not displaying full resolution because it is too large)'
                    urls = [entry['image_urls']['medium']]
                else:
                    urls = [entry['image_urls']['large']]
            files = []
            index = 0
            for url in urls:
                print('downloading picture...')
                response = requests.get(url, headers={'referer': 'https://pixiv.net'})
                if response.status_code != 200:
                    continue
                ext = Path(url).suffix
                files.append({'data': response.content, 'name': 'page{}{}'.format(index, ext)})
                index += 1
            ret.append({'content': content, 'files': files})
        ret.reverse()
        return ret
Beispiel #24
0
class PixivCrawler:
    KKRTAG = ['弦巻こころ']

    def __init__(
            self,
            auth,
            work_path=os.path.abspath('../pixiv/'),
    ):
        self._api = PixivAPI()
        self._api.login(*auth)
        self._wd = work_path

    def fetch_work(self, work_id, tag):
        got = False
        ri = self._api.works(work_id)
        try:
            r = ri.response[0]
        except:
            r = None
        if not r:
            return got
        url_list = []
        if r.metadata:
            for p in r.metadata.pages:
                url_list.append(p.image_urls.large)
        else:
            url_list.append(r.image_urls.large)

        created_time = r.created_time[:10].replace('-', '')
        wd = os.path.join(self._wd, created_time)
        if not os.path.isdir(wd):
            os.mkdir(wd)
        fns = []

        for url in url_list:
            fn = os.path.basename(url)
            final_fn = os.path.join(created_time, fn)
            _logger.info('getting %s to %s', url, wd)
            try:
                if self._api.download(url, fname=fn, path=wd):
                    got = True
                    shutil.move(os.path.join(wd, fn),
                                os.path.join(wd, fn + '.download'))
                fns.append(final_fn)
            except:
                import sys
                sys.excepthook(*sys.exc_info())
        if fns:
            meta = json.dumps(r)
            dmeta = {
                'work_id': work_id,
                'mode': tag,
                'user': r.user.id,
                'fn': fns,
                'meta': meta,
            }
            PixivCursor.insert_update_one(dmeta)
        return got

    def get_by_tag(self, search_tag='', filter_tag=[], num=30, save_tag=''):
        if not search_tag and not filter_tag:
            return None
        if filter_tag:
            filter_tag = [x.strip().lower() for x in filter_tag]
        if not search_tag:
            search_tag = filter_tag[0]
            filter_tag = filter_tag[1:]
        if not save_tag:
            save_tag = search_tag
        filter_tag = set(filter_tag)
        _logger.info('search: %s filter: %s', search_tag, filter_tag)
        ret = 0
        page = 1
        while ret < num:
            r = self._api.search_works(search_tag,
                                       mode='tag',
                                       page=page,
                                       per_page=30)
            try:
                l = r.response
            except:
                l = None

            if not l:
                break
            _logger.info('get %d illusts', len(l))
            for i in l:
                if i.type != 'illustration':
                    continue
                tt = set([x.strip().lower() for x in i.tags])
                if len(tt & filter_tag) != len(filter_tag):
                    continue
                if self.fetch_work(i.id, save_tag):
                    ret += 1
                if ret > num:
                    break
            page += 1

        return ret

    def get_rank(self, mode='daily', num=30):
        ret = 0
        page = 1
        while ret < num:
            r = self._api.ranking_all(mode=mode, page=page, per_page=30)
            try:
                l = r.response[0].works
            except:
                l = None
            if not l:
                break
            _logger.info('get %d ranking illust', len(l))
            for i in l:
                if i.work.type != 'illustration':
                    continue
                if self.fetch_work(i.work.id, mode):
                    ret += 1
                if ret >= num:
                    break
            page += 1
        return ret
Beispiel #25
0
import random

from pixivpy_async import PixivClient
from pixivpy_async import AppPixivAPI
from pixivpy_async import PixivAPI
from pixivpy3 import AppPixivAPI as Sync_aapi
from pixivpy3 import PixivAPI as Sync_papi

sys.dont_write_bytecode = True

_USERNAME = "******"
_PASSWORD = "******"

saapi = Sync_aapi()
saapi.login(_USERNAME, _PASSWORD)
spapi = Sync_papi()
spapi.login(_USERNAME, _PASSWORD)


def gen_date():
    """201x-0x-xx"""
    year = random.randint(3, 9)
    month = random.randint(1, 9)
    day = random.randint(10, 29)
    return '201%s-0%s-%s' % (year, month, day)


def test_sync_illust(num):
    e = time.time()
    for i in range(num):
        print('%s,' % i, end="")
class pixivImage:
    #Takes URL or ID as argument
    def __init__(self, *args):
        baseURL = "https://www.pixiv.net/member_illust.php?mode=medium&illust_id="
        self.image_URLs = []
        self.directories = []
        self.caption = ""
        self.userTags = []
        self.userImported = False
        for arg in args:
            length = len(str(arg))
            #If it is an ID, it is 8 digits long and an int
            if length == 8:
                self.ID = int(arg)
                self.URL = baseURL + str(arg)
            #If it's a url, it's the baseURL plus the int
            elif isinstance(arg, str) & length == len(baseURL) + 8:
                self.URL = arg
                try:
                    self.ID = self.url[self.url.find("&illust_id=", 0, length
                                                     ) +
                                       len("&illust_id="):length]
                except TypeError:
                    print("URL is malformed")
                #Fix minor bad URL
                self.URL = baseURL + str(arg["ID"])
            else:
                print("URL OR ID is wrong or in bad format")

    #Gets PixixImage attribute
    def __get__(self, obj, objtype):
        #Tries to get attribute, if it does not exist, cycles through imports then outputs error
        try:
            return getattr(obj, self.attr)
        except AttributeError:
            try:
                self.importIllustJSON()
                return self.item
            except AttributeError:
                try:
                    self.importUserJSON()
                except AttributeError:
                    print("Image does not have that attribute")
                    pass

    def setCustomTags(self, tags):
        self.userTags = tags

    def setCaption(self, caption):
        self.caption = caption

    #Import info using pixivAPI into class from JSON
    def importIllustJSON(self):
        #Login to Pixiv API
        self.api = PixivAPI()
        self.api.login(pixivLogin["pixivusername"],
                       pixivLogin["pixivpassword"])
        userURL = "https://www.pixiv.net/member_id="
        self.JSON = self.api.works(self.ID)['response'][0]
        self.manga = self.JSON['is_manga']
        self.account = self.JSON['user']['account']
        self.name = self.JSON['user']['name']
        self.user_ID = self.JSON['user']['id']
        self.user_URL = userURL + str(self.user_ID)
        self.title = self.JSON['title']
        self.tags = self.JSON['tags']
        self.pages = self.JSON['page_count']
        if self.pages > 1:
            for page in range(self.pages - 1):
                self.image_URLs.append(self.JSON['metadata']["pages"][page]
                                       ["image_urls"]['large'])
        else:
            self.image_URLs.append(self.JSON['image_urls']['large'])

    #Imports JSON with user information.
    def importUserJSON(self):
        #Non-authenticated API login
        aapi = AppPixivAPI()
        self.userJSON = aapi.user_detail(self.user_ID)
        self.webpage = self.userJSON['profile']['webpage']
        self.twitter_name = self.userJSON['profile']['twitter_account']
        self.twitter_URL = self.userJSON['profile']['twitter_url']
        self.pawoo_URL = self.userJSON['profile']['pawoo_url']
        self.userImported = True

    #Manually import JSON information
    def importJSON(self):
        self.importIllustJSON()
        self.importUserJSON()

    #Downloads images to directory
    def download(self, directory=None):
        for URL in self.image_URLs:
            if directory is None:
                directory = os.path.dirname(
                    os.path.abspath(__file__)) + "\\temp\\"
                if not os.path.exists(directory):
                    os.makedirs(os.path.dirname(directory))
                self.api.download(URL, prefix=directory)
            else:
                if not os.path.exists(directory):
                    os.makedirs(directory)
                self.api.download(URL, prefix=directory)
                directory = directory + "\\" + str(os.path.basename(URL))
                self.directories.append(directory)
                self.api.download(URL)
Beispiel #27
0
class PixivPixie:
    """Pixiv API interface.

    Remember call login() before using other methods.

    Attributes:
        auto_re_login: If true, PixivPixie will auto re-login when login token
            expired.
    """
    def __init__(self, auto_re_login=True, **requests_kwargs):
        self.auto_re_login = auto_re_login
        self._requests_kwargs = requests_kwargs

        self._papi = PixivAPI(**requests_kwargs)
        self._aapi = AppPixivAPI(**requests_kwargs)

        self._has_auth = False
        self._last_login = None
        self._check_auth_lock = Lock()

        self._username = None
        self._password = None

    @property
    def requests_kwargs(self):
        """Parameters that will be passed to requests."""
        return self._requests_kwargs

    @requests_kwargs.setter
    def requests_kwargs(self, requests_kwargs):
        self._requests_kwargs = requests_kwargs
        self._papi.requests_kwargs = requests_kwargs
        self._aapi.requests_kwargs = requests_kwargs

    @property
    def has_auth(self):
        """Whether the pixie has login."""
        return self._has_auth

    @property
    def last_login(self):
        """Last login time. Will be a datetime object or None if haven't login
        yet."""
        return self._last_login

    def login(self, username, password):
        """Login Pixiv account.

        Notice: The access token will expire after about 1 hour. So if you are
            dealing with a long time quest, remember to re-login every some
            time.

        Args:
            username: Your Pixiv account's username.
            password: Your Pixiv account's password.

        Returns:
            None.

        Raises:
            LoginFailed: An error occurred if the username and password is not
                match.
        """
        if not username or not password:
            raise LoginFailed

        try:
            self._papi.login(username, password)

            # self._aapi.login(username, password)
            self._aapi.access_token = self._papi.access_token
            self._aapi.user_id = self._papi.user_id
            self._aapi.refresh_token = self._papi.refresh_token
        except PixivError:
            raise LoginFailed
        else:
            self._has_auth = True
            self._username = username
            self._password = password
            self._last_login = datetime.datetime.now()

        return self

    def check_auth(self, auto_re_login=False):
        """Raise error if the pixie doesn't has auth.

        Args:
            auto_re_login: If true, the PixivPixie will try to re-login when
                login token expired.

        Raises:
            NoAuth: If the PixivPixie hasn't login first.
            LoginFailed: If re-login failed.
        """
        with self._check_auth_lock:
            if not self.has_auth:
                raise NoAuth
            if datetime.datetime.now() - self.last_login >= TOKEN_LIFETIME:
                # Token expired
                if auto_re_login:
                    self.login(self._username, self._password)
                else:
                    raise NoAuth

    @_need_auth
    def illust(self, illust_id):
        """Gets a single illust.

        Args:
            illust_id: An integer.

        Returns:
            A PixivIllust object.

        Raises:
            Any exceptions check_auth() will raise.
            IllustError: If the illust_id is invalid or the illust is blocked by
                the Pixiv account setting.
        """
        json_result = Json(self._papi.works(illust_id))
        if json_result.status != 'success':
            error_code = json_result.errors.system.get('code')
            error_message = {
                206: 'Target illust not found.',
                229: 'Illust browsing restricted.',
            }
            raise IllustError(illust_id, error_message.get(error_code))
        return PixivIllust.from_papi(self, json_result.response[0])

    @classmethod
    def _papi_call(cls,
                   call_func,
                   page=1,
                   per_page=30,
                   iter_target=None,
                   extra_yield=None,
                   **kwargs):
        current_page = page
        while current_page:
            json_result = Json(
                call_func(page=current_page, per_page=per_page, **kwargs))

            if json_result.status != 'success':
                raise APIError(call_func, json_result.errors)

            if iter_target is None:
                target = json_result.response
            else:
                target = iter_target(json_result.response)

            for item in target:
                if extra_yield is None:
                    yield item
                else:
                    yield item, extra_yield(json_result.response)

            current_page = json_result.pagination.next

    def _aapi_call(self, call_func, **kwargs):
        req_auth = True

        while True:
            try:
                if int(kwargs['offset']) >= 5000:
                    break
            except (KeyError, ValueError):
                pass
            json_result = Json(call_func(**kwargs, req_auth=req_auth))

            if 'error' in json_result:
                raise APIError(call_func, json_result.error)

            yield from json_result.illusts

            if json_result.next_url is None:
                break
            kwargs = self._aapi.parse_qs(json_result.next_url)

    @query_set
    @_need_auth
    def my_following_illusts(self, until=None):
        """Fetch new illusts of following users.

        Fetch new illusts of following users.

        Normal user can only have the first 2000 illust while Premium user can
        have the first 5000.

        If you didn't turn off the browsing restriction in account setting, the
        R-18(G) illusts will be excluded.

        Args:
            until: Could be:
                [default] None: No limit.
                A string or datetime object which corresponding to the earliest
                    creation time of illusts.

        Returns:
            A QuerySet that yield PixivIllust object.

        Raises:
            Any exceptions check_auth() will raise.
        """
        if isinstance(until, str):
            until = dateutil.parser.parse(until)
        for json_result in self._papi_call(self._papi.me_following_works):
            illust = PixivIllust.from_papi(self, json_result)
            if until is not None and illust.creation_time < until:
                return
            yield illust

    @query_set
    @_need_auth
    def user_illusts(self, user_id):
        """Fetch a user's illusts.

        Fetch a user's illusts.
        If you didn't turn off the browsing restriction in account setting, the
        R-18(G) illusts will be excluded.

        Args:
            user_id: An integer.

        Returns:
            A QuerySet that yield PixivIllust object.

        Raises:
            Any exceptions check_auth() will raise.
            PAPIError: If the user_id is invalid.
        """
        for json_result in self._papi_call(
                self._papi.users_works,
                author_id=user_id,
        ):
            yield PixivIllust.from_papi(self, json_result)

    @query_set
    @_need_auth
    def ranking(
        self,
        mode=RankingMode.DAY,
        date=None,
    ):
        """Fetch all ranking illusts.

        Fetch all ranking illusts and returns them from rank high to low.
        If you didn't turn off the browsing restriction in account setting, the
        R-18(G) illusts will be excluded.

        Args:
            mode: Could be:
                [default] DAY
                WEEK
                MONTH
                DAY_MALE
                DAY_FEMALE
                WEEK_ORIGINAL
                WEEK_ROOKIE
                DAY_MANGA
                DAY_R18
                DAY_MALE_R18
                DAY_FEMALE_R18
                WEEK_R18
                WEEK_R18G

                These constants are defined in
                    pixiv_pixie.constants.RankingMode.
            date: Could be:
                [default] None: Will fetch the latest ranking.
                A date or datetime object.
                A string in the format of '%Y-%m-%d', e.g., '2017-08-01'.

        Returns:
            A QuerySet that yield PixivIllust object.

        Raises:
            Any exceptions check_auth() will raise.
        """
        if isinstance(date, (datetime.date, datetime.datetime)):
            date = date.strftime('%Y-%m-%d')

        # The response of PAPI does not contains metadata. So AAPI was used.
        for rank, json_result in enumerate(self._aapi_call(
                self._aapi.illust_ranking,
                mode=mode.value,
                date=date,
        ),
                                           start=1):
            illust = PixivIllust.from_aapi(self, json_result)
            illust.rank = rank
            yield illust

    @query_set
    @_need_auth
    def search(
        self,
        query,
        mode=SearchMode.TAG,
        period=SearchPeriod.ALL,
        order=SearchOrder.DESC,
    ):
        """Search illusts.

        Search illusts.

        Args:
            query: Query keyword. You can separate multiple keywords by space.
            mode: Could be:
                TEXT: Search in title and caption.
                [default] TAG: Search in tags.
                EXACT_TAG: Search in tags. Only exactly matched tag is
                    acceptable.
                CAPTION: Search in caption.

                These constants are defined in pixiv_pixie.constants.SearchMode.
            period: Could be:
                [default] ALL
                DAY
                WEEK
                MONTH

                This parameter is only applied when order is ASC.
                These constants are defined in
                    pixiv_pixie.constants.SearchPeriod.
            order: Could be:
                [default] DESC: The output will be from new to old.
                ASC: The output will be from old to new.

                These constants are defined in
                    pixiv_pixie.constants.SearchOrder.

        Returns:
            A QuerySet that yield PixivIllust object.

        Raises:
            Any exceptions check_auth() will raise.
        """
        for json_result in self._papi_call(
                self._papi.search_works,
                query=query,
                mode=mode.value,
                period=period.value,
                order=order.value,
        ):
            yield PixivIllust.from_papi(self, json_result)

    @query_set
    @_need_auth
    def related_illusts(self, illust_id, limit=None):
        """Fetch all related illusts.

        Fetch all related illusts of a provided illust.

        Args:
            illust_id: An integer.
            limit: Max number of illust to be yield. If limit=None, there will
                be no limit.

        Returns:
            A QuerySet that yield PixivIllust object.

        Raises:
            Any exceptions check_auth() will raise.
        """
        for cnt, json_result in enumerate(self._aapi_call(
                self._aapi.illust_related,
                illust_id=illust_id,
        ),
                                          start=1):
            if limit is not None and cnt > limit:
                break

            yield PixivIllust.from_aapi(self, json_result)

    @classmethod
    def convert_zip_to_gif(
        cls,
        input_file,
        frame_delays=None,
        output_file=None,
        use_pil=False,
    ):
        """Convert a zip file that contains all frames into gif.

        Convert a zip file that contains all frames into gif.

        Args:
            input_file: The input file. May be str or a file-like object.
            frame_delays: A list of delay durations in microsecond.
            output_file: The output file. May be str or a file-like object.
            use_pil: Whether to ues Pillow library to create GIF file. By
                default FreeImage library will be used. FreeImage create better
                quality and smaller size file, but require external .dll/.so and
                may crash for unknown reason.
        """
        if frame_delays is None:
            if isinstance(input_file, str):
                frame_info = os.path.splitext(input_file)[0] + '.txt'
                with open(frame_info, 'rt', encoding='utf-8') as f:
                    frame_delays = [int(line) for line in f if line.strip()]
            else:
                raise ValueError('Could not get frame delays.')

        if output_file is None:
            if isinstance(input_file, str):
                output_file = os.path.splitext(input_file)[0] + '.gif'
            else:
                raise ValueError('Could not determined output filename.')

        dir_name = os.path.dirname(output_file)
        if dir_name:
            os.makedirs(dir_name, exist_ok=True)

        images = []
        with ZipFile(input_file) as zip_file:
            for name in sorted(zip_file.namelist()):
                with zip_file.open(name) as input_file:
                    images.append(imageio.imread(io.BytesIO(
                        input_file.read())))
        frame_delays = [delay / 1000 for delay in frame_delays]

        if not use_pil:
            save_format = 'GIF-FI'
        else:
            save_format = 'GIF-PIL'
        imageio.mimwrite(
            output_file,
            images,
            format=save_format,
            duration=frame_delays,
        )
        del images

    @classmethod
    def _get_file_path(
        cls,
        illust,
        page,
        url,
        convert_ugoira,
        directory,
        name,
        addition_naming_info,
    ):
        original_name = os.path.basename(url)
        root, ext = os.path.splitext(original_name)

        if convert_ugoira and ext == '.zip':
            ext = '.gif'
            original_name = root + ext

        if name:
            naming_info = {
                'illust': illust,
                'page': page,
                'original_name': original_name,
                'root': root,
                'ext': ext,
            }
            if addition_naming_info:
                naming_info.update(addition_naming_info)
            filename = name.format(**naming_info)
        else:
            filename = original_name

        file_path = os.path.join(directory, filename)

        return file_path

    @classmethod
    def _try_remove_file(cls, path):
        if not isinstance(path, str) or not path:
            return

        try:
            os.remove(path)
        except OSError:
            pass

    @classmethod
    def _check_exist(cls, path, checklist):
        basename = os.path.basename(path)

        for folder in checklist:
            if os.path.exists(os.path.join(folder, basename)):
                return True

        return False

    def _download_illust_to_file(self, url, file):
        requests_kwargs = self.requests_kwargs.copy()
        requests_kwargs['stream'] = True
        requests_kwargs['headers'] = ILLUST_DOWNLOAD_HEADERS

        try:
            wrote_size = 0
            total_size = None

            for wrote_size, total_size in download(
                    file,
                    url,
                    **requests_kwargs,
            ):
                pass

            if total_size is not None and wrote_size < total_size:
                raise APIError(
                    self.download,
                    'Unexpected connection interruption.',
                )

        except requests.HTTPError as e:
            raise APIError(self.download, e.response.text) from e

    def _download_one_url(
        self,
        illust,
        url,
        path,
        convert_ugoira,
        replace,
        check_exists,
        max_tries,
        fake_download,
        use_pil,
    ):
        if not replace and os.path.exists(path):
            return False

        if self._check_exist(path, check_exists):
            return False

        if fake_download:
            return False

        dir_name = os.path.dirname(path)

        frame_path = None

        for tries in count(start=1):
            try:
                buffer = io.BytesIO()
                self._download_illust_to_file(url, buffer)
                buffer.seek(0)

                if illust.type == IllustType.UGOIRA and convert_ugoira:
                    self.convert_zip_to_gif(
                        buffer,
                        illust.frame_delays,
                        path,
                        use_pil,
                    )
                else:
                    if dir_name:
                        os.makedirs(dir_name, exist_ok=True)

                    with open(path, 'wb') as f:
                        copyfileobj(buffer, f)

                    if illust.type == IllustType.UGOIRA:
                        frame_path = os.path.splitext(path)[0] + '.txt'
                        with open(frame_path, 'wt') as f:
                            for frame_delay in illust.frame_delays:
                                print(frame_delay, file=f)

                return True
            except Exception as e:
                self._try_remove_file(path)
                self._try_remove_file(frame_path)

                if max_tries is None or tries < max_tries:
                    continue

                raise DownloadError(illust, e) from e

    def _download_multiple_urls(
        self,
        illust,
        target,
        convert_ugoira,
        replace,
        check_exists,
        max_tries,
        fake_download,
        use_pil,
    ):
        result = []

        for url, path in target:
            result.append((url, path,
                           self._download_one_url(
                               illust,
                               url,
                               path,
                               convert_ugoira=convert_ugoira,
                               replace=replace,
                               check_exists=check_exists,
                               max_tries=max_tries,
                               fake_download=fake_download,
                               use_pil=use_pil,
                           )))

        return result

    @_need_auth
    def download(
        self,
        illust,
        directory=os.path.curdir,
        name=None,
        addition_naming_info=None,
        convert_ugoira=True,
        replace=False,
        check_exists=None,
        max_tries=5,
        fake_download=False,
        use_pil=False,
    ):
        """Download illust.

        Download illust.

        Args:
            illust: The illust or illust_id to be downloaded.
            directory: Directory.
            name: If set, the downloaded file would be renamed. Could contains
                format string syntax.
                e.g. name='{illust.user_id}_{original_name}'
                The following information is provided:
                    illust: The illust object.
                    page: 0-based page number.
                    original_name: The default filename.
                    root: The root part of original_name. e.g. 'foo' in
                        'foo.bar'.
                    ext: The extension part of original_name. e.g. '.bar' in
                        'foo.bar'.
            addition_naming_info: Addition dict that will be used when
                formatting name.
            convert_ugoira: Whether to download ugoira as gif. If false, a zip
                file will be downloaded instead. And a txt file contains frame
                durations would be created.
            replace: If true, will replace already exist file(s).
            check_exists: Addition path(s) to check whether the illust exists
                (by name). Could be a path string, a list of path string or
                None.
            max_tries: Max try times when download failed. If max_tries=None, it
                will loop infinitely until finished.
            fake_download: If True, no file will be actually downloaded.
            use_pil: Whether to ues Pillow library to create GIF file. Refers to
                the doc of PixivPixie.convert_zip_to_gif().

        Returns:
            A list of download result of each page. Each result is a tuple of
                (url, path, downloaded).

        Raises:
            Any exceptions check_auth() will raise.
            DownloadError.
        """
        if isinstance(illust, int):
            illust = self.illust(illust)

        if check_exists is None:
            check_exists = []
        elif isinstance(check_exists, str):
            check_exists = [check_exists]

        download_target = []
        for tries in count(start=1):
            try:
                download_target = [(
                    url,
                    self._get_file_path(
                        illust,
                        page,
                        url,
                        convert_ugoira,
                        directory,
                        name,
                        addition_naming_info,
                    ),
                ) for page, url in enumerate(illust.image_urls)]
                break
            except Exception as e:
                if max_tries is None or tries < max_tries:
                    continue

                raise DownloadError(illust, e) from e

        return self._download_multiple_urls(
            illust,
            download_target,
            convert_ugoira=convert_ugoira,
            replace=replace,
            check_exists=check_exists,
            max_tries=max_tries,
            fake_download=fake_download,
            use_pil=use_pil,
        )
Beispiel #28
0
class Pixiv(object):
    def __init__(self, dbDict, config):
        self.dbDict = dbDict
        self.username = config['PIXIV_USERNAME']
        self.password = config['PIXIV_PASSWORD']
        self.downloadDirectory = config['PIXIV_DOWNLOAD_DIRECTORY']
        self.avatarDirectory = config['PIXIV_AVATAR_DIRECTORY']
        os.makedirs(self.downloadDirectory, exist_ok=True)
        os.makedirs(self.avatarDirectory, exist_ok=True)
        self.api = PixivAPI()
        self.authorize()

    def authorize(self):
        self.api.login(self.username, self.password)


    def loadWorks(self):
        print('Retrieving Pixiv works')
        self.authorize()
        feeds = self.api.me_feeds()
        workIds = [r['ref_work']['id'] for r in feeds['response'] if r['type'] == 'add_illust']
        workDicts = [self._getWorkDict(workId) for workId in workIds]
        works = [w for workDict in workDicts for w in self._getImageData(workDict)]
        return works


    def _getWorkDict(self, workId):
        url = 'https://public-api.secure.pixiv.net/v1/works/' + workId + '.json'
        result = self.api.auth_requests_call('GET', url)
        try:
            return self.api.parse_result(result)
        except PixivError as p:
            return {'status' : 'failure', 'errors' : str(p)}

    def _getImageData(self, workDict):
        imageList = []
        for imageDict in workDict['response']:
            imageData = {
                'identifier'      : '',
                'authorName'      : '',
                'authorHandle'    : '',
                'authorAvatarUrl' : '',
                'profileUrl'      : '',
                'website'         : '',
                'imageTitle'      : '',
                'imageUrls'       : [],
                'imagePageUrl'    : '',
                'imageTimestamp'  : '',
                'imageType'       : '',
                'nsfw'            : False,
                'width'           : '500',
                'height'          : '500',
                'success'         : False,
                'error'           : 'Unknown error',
            }

            if workDict['status'] == 'success':
                identifier = str(imageDict.get('id'))
                if identifier not in self.dbDict['works']: # Skip images we've already loaded
                    user = imageDict.get('user') or {}
                    imageData['identifier']      = identifier
                    imageData['authorName']      = str(user.get('name'))
                    imageData['authorHandle']    = str(user.get('account'))
                    imageData['authorAvatarUrl'] = self._getAvatarUrl(str((user.get('profile_image_urls') or {}).get('px_50x50')))
                    imageData['profileUrl']      = 'http://www.pixiv.net/member.php?id=' + str(user.get('id'))
                    imageData['website']         = 'Pixiv'
                    imageData['imageTitle']      = str(imageDict.get('title'))
                    imageData['imageUrls']       = self._getImageUrls(imageDict)
                    imageData['imagePageUrl']    = 'http://www.pixiv.net/member_illust.php?mode=medium&illust_id=' + str(imageDict.get('id'))
                    imageData['imageTimestamp']  = str(max(imageDict.get('created_time'), imageDict.get('reupoloaded_time', '')))
                    imageData['imageType']       = str(imageDict.get('type'))
                    imageData['nsfw']            = str(imageDict.get('age_limit') != 'all-age')
                    imageData['width']           = str(imageDict.get('width')) or '500'
                    imageData['height']          = str(imageDict.get('height')) or '500'
                    imageData['success']         = str(imageDict.get('status') == 'success')
                    imageData['error']           = str(imageDict.get('errors'))

                    self.dbDict['works'][identifier] = imageData
            else:
                raise RuntimeError('Failed Pixiv API call: ' + workDict.get('errors'))

        return imageList

    def _parseTime(self, imageDict):
        s = max(imageDict.get('created_time', ''), imageDict.get('reupoloaded_time', ''))
        return datetime.datetime.strptime(s, '%Y-%m-%d %H:%M:%S').replace(tzinfo=pytz.UTC).isoformat()

    def _getAvatarUrl(self, remoteUrl):
        return self._downloadImage(remoteUrl, self.avatarDirectory)

    def _getImageUrls(self, imageDict):
        workType = imageDict.get('type')
        if workType == 'illustration':
            urlDict = imageDict.get('image_urls') or {}
            urls = [self._generateImageUrl(urlDict.get('small') or urlDict.get('medium') or urlDict.get('large'))]

        elif workType == 'manga':
            pages = (imageDict.get('metadata') or {}).get('pages') or []
            def getMangaUrl(d):
                urld = d.get('image_urls')
                return self._generateImageUrl(urld.get('small') or urld.get('medium') or urld.get('large'))
            urls = [getMangaUrl(item) for item in pages]

        # Ugoira handling falls through to default for now
        # elif workType == 'ugoira':
        #     pass

        else:
            #Default case; all response types seem to have at least something in image_urls
            urlDict = imageDict.get('image_urls') or {}
            urls = [urlDict.get('small') or urlDict.get('medium') or urlDict.get('large')]

        urls = [self._downloadImage(url, self.downloadDirectory) for url in urls]
        return urls

    def _generateImageUrl(self, url):
        # Construct the URL for the full-res image. Super brittle; entirely dependent on Pixiv never changing anything
        leftSide  = url[:url.find('pixiv.net')+10]
        rightSide = url[url.find('/img/'):].replace('_master1200', '')
        return leftSide + 'img-original' + rightSide


    def _downloadImage(self, url, directory):
        print('Downloading ' + url)
        def attemptDownload(attemptUrl, suffix):
            attemptUrl = '.'.join((attemptUrl.rpartition('.')[0], suffix))
            return requests.get(attemptUrl, headers={'referer': attemptUrl[:attemptUrl.find('/img')]}, stream=True)

        r = attemptDownload(url, 'png')
        if r.status_code == 404:
            r = attemptDownload(url, 'jpg')
            if r.status_code == 404:
                r = attemptDownload(url, 'gif')

        if r.status_code == 200:
            filename = url.split('/')[-1]
            filepath = os.path.join(directory, filename)
            if os.path.isfile(filepath):
                print ('File already downloaded; skipping')
            else:
                with open(filepath, 'wb') as f:
                    for chunk in r:
                        f.write(chunk)
            return '/'.join((directory, filename))
        else:
            return r.status_code + ' ' + url