def search(self, word, page=1, search_type='illust', download_threshold=DOWNLOAD_THRESHOLD): if word: url = (PIXIV_SEARCH_URL % (word, search_type, int(page))) else: raise PixivError('search word can not be null') print(url) html = self.request_page(url) if not html: print("Get Page is None!URL:" + url) return [] search_result = PixivHtmlParser.parse_search_result(html) pop_result = PixivHtmlParser.parse_popular_introduction(html) if not pop_result: pop_result = [] if search_result: pop_result.extend(search_result) # 过滤数据不完整和收藏数不超过阈值的插画信息 if len(pop_result) > 0: pop_result = filter( lambda data: (data.has_key("url") and data.has_key( "title") and data.has_key("mark_count") and int( data.mark_count) >= download_threshold), pop_result) for result in pop_result: if not result.has_key('id'): result['id'] = CommonUtils.get_url_param( result['url'], "illust_id") return pop_result
def download_illustration(self, illu, path, p_limit=0): """ 通过程序构造的插画详情下载 :param illu: 插画详情 :param path: 下载路径 :param p_limit: 插画p数(页数)限制 0代表不限制 :return: """ if illu.has_key("url") and illu.has_key("title"): illust_id = CommonUtils.get_url_param(illu.url, "illust_id") detail = self.api.illust_detail(illust_id) if detail: try: detail = detail.illust # 普通插画 if detail.page_count == 1: try: url = detail.meta_single_page.original_image_url except Exception: url = detail.image_urls.large path = self.download(illust_id, path, url) # 多图插画 else: if 0 < p_limit < detail.page_count: # 该插画P数大于最大限制,放弃下载 print("Pixiv id:%s, P>limit,Skip download" % (illust_id, )) return PAGE_LIMIT_CONTINUE urls = detail.meta_pages # 获取多图 if len(urls) > 1: # 多图放入一个文件夹中 path += "/p_%s" % illust_id if not os.path.exists(path): os.mkdir(path) for index in range(len(urls)): try: url = urls[index].image_urls.original if \ urls[index].image_urls.has_key("original") else urls[index].image_urls.large extension = os.path.splitext(url)[1] save_path = path + "/p_%s_%d%s" % ( illust_id, index, extension) print(save_path) self.api.download(url, path=save_path) except Exception: continue path = path + "/" else: # 获取多图失败,下载大图 url = detail.image_urls.large path = self.download(illust_id, path, url) return path except Exception as e: error_log("Download fail:") error_log(e) else: print(illu.title + " can't get detail id :" + illust_id) else: return
def download_illustration(illu, path, auth_api): """ #illu 包含插画详细 path 存储路径 auth_api 具有身份验证的下载工具 """ if illu.has_key("url") and illu.has_key("title"): illust_id = CommonUtils.get_url_param(illu.url, "illust_id") detail = PixivApi.illust_detail(illust_id) if detail: try: detail = detail.illust # 普通插画 if detail.page_count == 1: try: url = detail.meta_single_page.original_image_url except: url = detail.image_urls.large download(illust_id, illu.title, path, url, auth_api) # 多图插画 else: if detail.page_count > P_LIMIT: # 该插画P数大于最大限制,放弃下载 print("Pixiv id:%s,name:%s P>limit,Skip download" % (illust_id, illu.title)) return urls = detail.meta_pages # 获取多图 if len(urls) > 1: # 多图放入一个文件夹中 path += "/p_%s" % illust_id if not os.path.exists(path): os.mkdir(path) for index in range(len(urls)): try: url = urls[index].image_urls.original if \ urls[index].image_urls.has_key("original") else urls[index].image_urls.large extension = os.path.splitext(url)[1] if IMAGE_USE_ORG_NAME: save_path = path + "/p_%s_%s_%d%s" % ( illust_id, CommonUtils.filter_dir_name(illu.title), index, extension) else: save_path = path + "/p_%s_%d%s" % (illust_id, index, extension) print(save_path) auth_api.download(url, path=save_path) except: continue else: # 获取多图失败,下载大图 url = detail.image_urls.large download(illust_id, illu.title, path, url, auth_api) except Exception, e: error_log("Download fail:") error_log(e) else: print(illu.title + " can't get detail id :" + illust_id)
def download_topics(cls, url, path, quality=1): html = HtmlDownloader.download(url) illu_list = HtmlDownloader.parse_illustration(html) title_des = HtmlDownloader.get_title(html) if title_des and illu_list: title_des["size"] = len(illu_list) CommonUtils.write_topic_des(path + "/topic.txt", title_des) if not illu_list: return for illu in illu_list: try: filename = CommonUtils.filter_dir_name(illu.title) extension = os.path.splitext(illu.image)[1] id = CommonUtils.get_url_param(illu.image_page, "illust_id") if quality == 1: # 通过api获取 插画原图地址,下载原图 detail = PixivApi.illust_detail(id) if detail: download_url = ImageDownload.get_image_url( illu, detail) if IMAGE_USE_ORG_NAME: save_path = path + "/p_%s_%s%s" % (id, filename, extension) else: save_path = path + "/p_%s%s" % (id, extension) print(save_path) PixivApi.download(download_url, path=save_path) else: print(illu.title + " can't get detail id :" + id) else: # 直接下载 pixivision 展示图 print(path + "/p_%s_%s%s" % (id, filename, extension)) PixivApi.download(illu.image, path=path + "/p_%s_%s%s" % (id, filename, extension)) except Exception, e: error_log("Download Illu Fail:" + " Illustration :" + str(illu)) error_log(e) continue
def download_topics(cls, url, path, create_path=False, downloader=None): html = PixivisionHtmlParser.download(url) illu_list = PixivisionHtmlParser.parse_illustration(html) title_des = PixivisionHtmlParser.get_title(html) # # 是否由该线程自主创建文件夹 if create_path and title_des and title_des.has_key('title'): path = path + "/" + title_des['title'] if not os.path.exists(path): os.makedirs(path) if title_des and illu_list: title_des["size"] = len(illu_list) title_des["url"] = url CommonUtils.write_topic_des(path + "/topic.txt", title_des) if not illu_list: return for illu in illu_list: id = CommonUtils.get_url_param(illu.image_page, "illust_id") if downloader: downloader.download_all_by_id(id, path + '/') else: PixivImageDownloader.download_all_by_id(id, path + '/') print('*' * 10) print(url + " Download End!") return path
def download_all_by_url(url, prefix): illust_id = CommonUtils.get_url_param(url, "illust_id") if illust_id: return download_all_by_id(illust_id, prefix) else: return PixivApi.download(url.strip(), prefix=prefix)
def download_byurl(cls, url): illust_id = CommonUtils.get_url_param(url, "illust_id") if illust_id: ImageDownload.download_image_byid(illust_id) else: PixivApi.download(url.strip())