class PixivCrawler: KKRTAG = ['弦巻こころ'] def __init__( self, auth, work_path=os.path.abspath('../pixiv/'), ): self._api = PixivAPI() self._api.login(*auth) self._wd = work_path def fetch_work(self, work_id, tag): got = False ri = self._api.works(work_id) try: r = ri.response[0] except: r = None if not r: return got url_list = [] if r.metadata: for p in r.metadata.pages: url_list.append(p.image_urls.large) else: url_list.append(r.image_urls.large) created_time = r.created_time[:10].replace('-', '') wd = os.path.join(self._wd, created_time) if not os.path.isdir(wd): os.mkdir(wd) fns = [] for url in url_list: fn = os.path.basename(url) final_fn = os.path.join(created_time, fn) _logger.info('getting %s to %s', url, wd) try: if self._api.download(url, fname=fn, path=wd): got = True shutil.move(os.path.join(wd, fn), os.path.join(wd, fn + '.download')) fns.append(final_fn) except: import sys sys.excepthook(*sys.exc_info()) if fns: meta = json.dumps(r) dmeta = { 'work_id': work_id, 'mode': tag, 'user': r.user.id, 'fn': fns, 'meta': meta, } PixivCursor.insert_update_one(dmeta) return got def get_by_tag(self, search_tag='', filter_tag=[], num=30, save_tag=''): if not search_tag and not filter_tag: return None if filter_tag: filter_tag = [x.strip().lower() for x in filter_tag] if not search_tag: search_tag = filter_tag[0] filter_tag = filter_tag[1:] if not save_tag: save_tag = search_tag filter_tag = set(filter_tag) _logger.info('search: %s filter: %s', search_tag, filter_tag) ret = 0 page = 1 while ret < num: r = self._api.search_works(search_tag, mode='tag', page=page, per_page=30) try: l = r.response except: l = None if not l: break _logger.info('get %d illusts', len(l)) for i in l: if i.type != 'illustration': continue tt = set([x.strip().lower() for x in i.tags]) if len(tt & filter_tag) != len(filter_tag): continue if self.fetch_work(i.id, save_tag): ret += 1 if ret > num: break page += 1 return ret def get_rank(self, mode='daily', num=30): ret = 0 page = 1 while ret < num: r = self._api.ranking_all(mode=mode, page=page, per_page=30) try: l = r.response[0].works except: l = None if not l: break _logger.info('get %d ranking illust', len(l)) for i in l: if i.work.type != 'illustration': continue if self.fetch_work(i.work.id, mode): ret += 1 if ret >= num: break page += 1 return ret
class Pixiv(Thread): search_on = 0.0 #最後查詢時間 client = None def __init__(self): Thread.__init__(self) def __connect(self): if self.client is None: try: self.client = PixivAPI() self.client.login(cfg['pixiv']['帳號'], cfg['pixiv']['密碼']) except Exception as e: raise e return False return True def run(self): pass def search(self, key, number=30): if not self.__connect(): return 'Pixiv模組發生錯誤 暫時不能使用' if number > 1000: number = 1000 if key[0] == '@': result = self.client.users_works(int(key[1:])) else: result = self.client.search_works( key, page=1, per_page=number, mode='tag', # text標題 tag標籤 exact_tag精準標籤 caption描述 period='all', # all所有 day一天內 week一週內 month一月內 order='desc', # desc新順序 asc舊順序 sort='date', ) if result.status == 'failure': return '找不到 <%s>' % (key) result_rank = [] for i in result.response: for i2 in result_rank: if i.stats.views_count > i2.stats.views_count: result_rank.insert(result_rank.index(i2), i) break else: result_rank.append(i) reply = [] for i in result_rank: self.client.download(i.image_urls.px_480mw, path=cfg['temp_dir'], name=str(i.id)) #px_128x128 px_480mw print('%s\\%s' % (cfg['temp_dir'], i.id)) url = imgur.upload('%s\\%s' % (cfg['temp_dir'], i.id)) #url = 'http://temp.maou.pw/%s' % (i.id) reply.append(url) if len(reply) >= 4: break url = 'https://www.pixiv.net/search.php?word=123&s_mode=s_tag_full' reply = reply[:4] reply.append(url) return reply def rss(self): if not self.__connect(): return 'Pixiv模組錯誤'
class pixivImage: #Takes URL or ID as argument def __init__(self, *args): baseURL = "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=" self.image_URLs = [] self.directories = [] self.caption = "" self.userTags = [] self.userImported = False for arg in args: length = len(str(arg)) #If it is an ID, it is 8 digits long and an int if length == 8: self.ID = int(arg) self.URL = baseURL + str(arg) #If it's a url, it's the baseURL plus the int elif isinstance(arg, str) & length == len(baseURL) + 8: self.URL = arg try: self.ID = self.url[self.url.find("&illust_id=", 0, length ) + len("&illust_id="):length] except TypeError: print("URL is malformed") #Fix minor bad URL self.URL = baseURL + str(arg["ID"]) else: print("URL OR ID is wrong or in bad format") #Gets PixixImage attribute def __get__(self, obj, objtype): #Tries to get attribute, if it does not exist, cycles through imports then outputs error try: return getattr(obj, self.attr) except AttributeError: try: self.importIllustJSON() return self.item except AttributeError: try: self.importUserJSON() except AttributeError: print("Image does not have that attribute") pass def setCustomTags(self, tags): self.userTags = tags def setCaption(self, caption): self.caption = caption #Import info using pixivAPI into class from JSON def importIllustJSON(self): #Login to Pixiv API self.api = PixivAPI() self.api.login(pixivLogin["pixivusername"], pixivLogin["pixivpassword"]) userURL = "https://www.pixiv.net/member_id=" self.JSON = self.api.works(self.ID)['response'][0] self.manga = self.JSON['is_manga'] self.account = self.JSON['user']['account'] self.name = self.JSON['user']['name'] self.user_ID = self.JSON['user']['id'] self.user_URL = userURL + str(self.user_ID) self.title = self.JSON['title'] self.tags = self.JSON['tags'] self.pages = self.JSON['page_count'] if self.pages > 1: for page in range(self.pages - 1): self.image_URLs.append(self.JSON['metadata']["pages"][page] ["image_urls"]['large']) else: self.image_URLs.append(self.JSON['image_urls']['large']) #Imports JSON with user information. def importUserJSON(self): #Non-authenticated API login aapi = AppPixivAPI() self.userJSON = aapi.user_detail(self.user_ID) self.webpage = self.userJSON['profile']['webpage'] self.twitter_name = self.userJSON['profile']['twitter_account'] self.twitter_URL = self.userJSON['profile']['twitter_url'] self.pawoo_URL = self.userJSON['profile']['pawoo_url'] self.userImported = True #Manually import JSON information def importJSON(self): self.importIllustJSON() self.importUserJSON() #Downloads images to directory def download(self, directory=None): for URL in self.image_URLs: if directory is None: directory = os.path.dirname( os.path.abspath(__file__)) + "\\temp\\" if not os.path.exists(directory): os.makedirs(os.path.dirname(directory)) self.api.download(URL, prefix=directory) else: if not os.path.exists(directory): os.makedirs(directory) self.api.download(URL, prefix=directory) directory = directory + "\\" + str(os.path.basename(URL)) self.directories.append(directory) self.api.download(URL)