コード例 #1
0
 def run(self):
     if not os.path.exists(self.path):
         try:
             os.makedirs(self.path)
         except Exception as e:
             error_log("make dir Fail:" + self.path)
             error_log(e)
             return
     try:
         path = ImageDownload.download_topics(self.url,
                                              self.path,
                                              create_path=self.create_path,
                                              downloader=self.downloader)
         if self.success:
             self.success(CommonUtils.build_callback_msg(path,
                                                         url=self.url))
             if self.callback_params and self.callback_params.has_key(
                     'current_count') and self.callback_params.has_key(
                         'all_count'):
                 current_count = self.callback_params[
                     'current_count'].getAndInc()
                 if self.callback_params['all_count'] == (current_count +
                                                          1):
                     self.success("Download from Pixivision:\n" +
                                  self.callback_params['url'] +
                                  "\nAll tasks are complete!\n\n")
     except Exception as e:
         print("Download topics fail")
         print(e)
         if self.fail:
             self.fail()
コード例 #2
0
 def download_illustration(self, illu, path, p_limit=0):
     """
     通过程序构造的插画详情下载
     :param illu:  插画详情
     :param path:  下载路径
     :param p_limit: 插画p数(页数)限制 0代表不限制
     :return:
     """
     if illu.has_key("url") and illu.has_key("title"):
         illust_id = CommonUtils.get_url_param(illu.url, "illust_id")
         detail = self.api.illust_detail(illust_id)
         if detail:
             try:
                 detail = detail.illust
                 # 普通插画
                 if detail.page_count == 1:
                     try:
                         url = detail.meta_single_page.original_image_url
                     except Exception:
                         url = detail.image_urls.large
                     path = self.download(illust_id, path, url)
                 # 多图插画
                 else:
                     if 0 < p_limit < detail.page_count:
                         # 该插画P数大于最大限制,放弃下载
                         print("Pixiv id:%s, P>limit,Skip download" %
                               (illust_id, ))
                         return PAGE_LIMIT_CONTINUE
                     urls = detail.meta_pages
                     # 获取多图
                     if len(urls) > 1:
                         # 多图放入一个文件夹中
                         path += "/p_%s" % illust_id
                         if not os.path.exists(path):
                             os.mkdir(path)
                         for index in range(len(urls)):
                             try:
                                 url = urls[index].image_urls.original if \
                                     urls[index].image_urls.has_key("original") else urls[index].image_urls.large
                                 extension = os.path.splitext(url)[1]
                                 save_path = path + "/p_%s_%d%s" % (
                                     illust_id, index, extension)
                                 print(save_path)
                                 self.api.download(url, path=save_path)
                             except Exception:
                                 continue
                         path = path + "/"
                     else:
                         # 获取多图失败,下载大图
                         url = detail.image_urls.large
                         path = self.download(illust_id, path, url)
                 return path
             except Exception as e:
                 error_log("Download fail:")
                 error_log(e)
         else:
             print(illu.title + " can't get detail id :" + illust_id)
     else:
         return
コード例 #3
0
 def run(self):
     if not os.path.exists(self.path):
         try:
             os.makedirs(self.path)
         except Exception, e:
             error_log("make dir Fail:" + self.path)
             error_log(e)
             return
コード例 #4
0
 def download_all_by_id(self, illust_id, path, p_limit=0):
     """
     通过pixiv id下载插画
     :param illust_id: id
     :param path:  下载路径
     :param p_limit: 是否限制插画p数(页数)
     :return:
     """
     detail = self.api.illust_detail(illust_id)
     if detail:
         try:
             detail = detail.illust
             # 普通插画
             if detail.page_count == 1:
                 try:
                     url = detail.meta_single_page.original_image_url
                 except Exception:
                     url = detail.image_urls.large
                 extension = os.path.splitext(url)[1]
                 save_path = path + "/p_%s%s" % (illust_id, extension)
                 print("Downloading:" + save_path)
                 path = self.api.download(url, path=save_path)
             # 多图插画
             else:
                 if 0 < p_limit < detail.page_count:
                     # 该插画P数大于最大限制,放弃下载
                     print("Pixiv id:%s P>limit,Skip download" %
                           (illust_id, ))
                     return PAGE_LIMIT_CONTINUE
                 urls = detail.meta_pages
                 # 获取多图
                 if len(urls) > 1:
                     # 多图放入一个文件夹中
                     path += "/p_%s" % illust_id
                     if not os.path.exists(path):
                         os.mkdir(path)
                     for index in range(len(urls)):
                         try:
                             url = urls[index].image_urls.original if \
                                 urls[index].image_urls.has_key("original") else urls[index].image_urls.large
                             extension = os.path.splitext(url)[1]
                             save_path = path + "/p_%s_%d%s" % (
                                 illust_id, index, extension)
                             print("Downloading:" + save_path)
                             self.api.download(url, path=save_path)
                         except Exception:
                             continue
                     path = path + "/"
                 else:
                     # 获取多图失败,下载大图
                     url = detail.image_urls.large
                     path = self.api.download(url, prefix=path)
             return path
         except Exception as e:
             error_log("Download fail:" + str(detail))
             error_log(e)
     else:
         print(" can't get detail id :" + str(illust_id))
コード例 #5
0
def download_illustration(illu, path, auth_api):
    """
    #illu 包含插画详细
    path 存储路径
    auth_api 具有身份验证的下载工具
    """
    if illu.has_key("url") and illu.has_key("title"):
        illust_id = CommonUtils.get_url_param(illu.url, "illust_id")
        detail = PixivApi.illust_detail(illust_id)
        if detail:
            try:
                detail = detail.illust
                # 普通插画
                if detail.page_count == 1:
                    try:
                        url = detail.meta_single_page.original_image_url
                    except:
                        url = detail.image_urls.large
                    download(illust_id, illu.title, path, url, auth_api)
                # 多图插画
                else:
                    if detail.page_count > P_LIMIT:
                        # 该插画P数大于最大限制,放弃下载
                        print("Pixiv id:%s,name:%s P>limit,Skip download" % (illust_id, illu.title))
                        return
                    urls = detail.meta_pages
                    # 获取多图
                    if len(urls) > 1:
                        # 多图放入一个文件夹中
                        path += "/p_%s" % illust_id
                        if not os.path.exists(path):
                            os.mkdir(path)
                        for index in range(len(urls)):
                            try:
                                url = urls[index].image_urls.original if \
                                    urls[index].image_urls.has_key("original") else urls[index].image_urls.large
                                extension = os.path.splitext(url)[1]
                                if IMAGE_USE_ORG_NAME:
                                    save_path = path + "/p_%s_%s_%d%s" % (
                                        illust_id,
                                        CommonUtils.filter_dir_name(illu.title),
                                        index, extension)
                                else:
                                    save_path = path + "/p_%s_%d%s" % (illust_id, index, extension)
                                print(save_path)
                                auth_api.download(url, path=save_path)
                            except:
                                continue
                    else:
                        # 获取多图失败,下载大图
                        url = detail.image_urls.large
                        download(illust_id, illu.title, path, url, auth_api)
            except Exception, e:
                error_log("Download fail:")
                error_log(e)
        else:
            print(illu.title + " can't get detail id :" + illust_id)
コード例 #6
0
 def handler_data(self, illu):
     print(illu)
     illu = json.loads(illu)
     if illu.has_key("url"):
         if illu.has_key("path"):
             print("Download start" + illu["path"])
             self.pixiv_api.download(illu["url"], illu["path"])
         else:
             print("Download start" + illu["url"])
             self.pixiv_api.download(illu["url"])
     else:
         error_log("Error data:" + str(illu))
コード例 #7
0
 def download_by_detail(self, detail, path, p_limit=0):
     """
     通过api获取的插画详情 下载
     :param detail: 插画详情
     :param path:   下载路径
     :param p_limit: 插画p数(页数)限制 0代表不限制
     :return:
     """
     if detail:
         try:
             illust_id = detail.id
             # 普通插画
             if detail.page_count == 1:
                 try:
                     url = detail.meta_single_page.original_image_url
                 except Exception:
                     url = detail.image_urls.large
                 path = self.download(illust_id, path, url)
             # 多图插画
             else:
                 if 0 < p_limit < detail.page_count:
                     # 该插画P数大于最大限制,放弃下载
                     print("Pixiv id:%s P>limit,Skip download" %
                           (illust_id, ))
                     return PAGE_LIMIT_CONTINUE
                 urls = detail.meta_pages
                 # 获取多图
                 if len(urls) > 1:
                     # 多图放入一个文件夹中
                     path += "/p_%s" % illust_id
                     if not os.path.exists(path):
                         os.mkdir(path)
                     for index in range(len(urls)):
                         try:
                             url = urls[index].image_urls.original if \
                                 urls[index].image_urls.has_key("original") else urls[index].image_urls.large
                             extension = os.path.splitext(url)[1]
                             save_path = path + "/p_%s_%d%s" % (
                                 illust_id, index, extension)
                             print(save_path)
                             self.api.download(url, path=save_path)
                         except Exception:
                             continue
                     path = path + "/"
                 else:
                     # 获取多图失败,下载大图
                     url = detail.image_urls.large
                     path = self.download(illust_id, path, url)
             return path
         except Exception as e:
             error_log("Download fail:")
             error_log(e)
コード例 #8
0
def download_all_by_id(illust_id, path, limit_p=True):
    detail = PixivApi.illust_detail(illust_id)
    if detail:
        try:
            detail = detail.illust
            # 普通插画
            if detail.page_count == 1:
                try:
                    url = detail.meta_single_page.original_image_url
                except:
                    url = detail.image_urls.large
                extension = os.path.splitext(url)[1]
                save_path = path + "/p_%s%s" % (illust_id, extension)
                print("Downloading:" + save_path)
                path = PixivApi.download(url, path=save_path)
            # 多图插画
            else:
                if detail.page_count > P_LIMIT and limit_p:
                    # 该插画P数大于最大限制,放弃下载
                    print("Pixiv id:%s P>limit,Skip download" % (illust_id,))
                    return
                urls = detail.meta_pages
                # 获取多图
                if len(urls) > 1:
                    # 多图放入一个文件夹中
                    path += "/p_%s" % illust_id
                    if not os.path.exists(path):
                        os.mkdir(path)
                    for index in range(len(urls)):
                        try:
                            url = urls[index].image_urls.original if \
                                urls[index].image_urls.has_key("original") else urls[index].image_urls.large
                            extension = os.path.splitext(url)[1]
                            save_path = path + "/p_%s_%d%s" % (illust_id, index, extension)
                            print("Downloading:" + save_path)
                            PixivApi.download(url, path=save_path)
                        except Exception:
                            continue
                    path = path + "/"
                else:
                    # 获取多图失败,下载大图
                    url = detail.image_urls.large
                    path = PixivApi.download(url, prefix=path)
            return path
        except Exception as e:
            error_log("Download fail:" + detail)
            error_log(e)
    else:
        print(" can't get detail id :" + str(illust_id))
コード例 #9
0
def run_by_pool():
    urls = [LINK_URL % n for n in range(1, PAGE_NUM + 1)]
    print (urls)
    # 5*20 最大100线程在运行
    error_log("start:" + str(time.time()))
    pool = ThreadPool(minthreads=1, maxthreads=5)
    for url in urls:
        pool.callInThread(start, url, save_path=IMAGE_SAVE_BASEPATH)
    pool.start()
    while True:
        # 每20s判断一次线程池状态,没有线程正在运行则停止下载进程
        time.sleep(20)
        if len(pool.working) == 0:
            pool.stop()
            error_log("end:" + str(time.time()))
            break
コード例 #10
0
def run_by_list():
    error_log("start:" + str(time.time()))
    # Pixivision全站插图爬取
    urls = [LINK_URL % n for n in range(1, PAGE_NUM + 1)]
    # 步伐,每次启动 2 *20 个图片下载进程 ,可根据电脑性能调整线程大小。其实运行速度和内存CPU关系不大,关键是网速
    step = 2
    length = len(urls)
    start_index = 0
    while start_index < length:
        launchers = []
        for url in urls[start_index:(start_index + step)]:
            print("Start " + url)
            launchers.append(PixivisionLauncher(url, IMAGE_SAVE_BASEPATH))
        for launcher in launchers:
            launcher.start()
        for launcher in launchers:
            launcher.join()
        start_index += step
    error_log("end:" + str(time.time()))
コード例 #11
0
 def download_topics(cls, url, path, quality=1):
     html = HtmlDownloader.download(url)
     illu_list = HtmlDownloader.parse_illustration(html)
     title_des = HtmlDownloader.get_title(html)
     if title_des and illu_list:
         title_des["size"] = len(illu_list)
         CommonUtils.write_topic_des(path + "/topic.txt", title_des)
     if not illu_list:
         return
     for illu in illu_list:
         try:
             filename = CommonUtils.filter_dir_name(illu.title)
             extension = os.path.splitext(illu.image)[1]
             id = CommonUtils.get_url_param(illu.image_page, "illust_id")
             if quality == 1:
                 # 通过api获取 插画原图地址,下载原图
                 detail = PixivApi.illust_detail(id)
                 if detail:
                     download_url = ImageDownload.get_image_url(
                         illu, detail)
                     if IMAGE_USE_ORG_NAME:
                         save_path = path + "/p_%s_%s%s" % (id, filename,
                                                            extension)
                     else:
                         save_path = path + "/p_%s%s" % (id, extension)
                     print(save_path)
                     PixivApi.download(download_url, path=save_path)
                 else:
                     print(illu.title + " can't get detail id :" + id)
             else:
                 # 直接下载 pixivision 展示图
                 print(path + "/p_%s_%s%s" % (id, filename, extension))
                 PixivApi.download(illu.image,
                                   path=path + "/p_%s_%s%s" %
                                   (id, filename, extension))
         except Exception, e:
             error_log("Download Illu Fail:" + " Illustration :" +
                       str(illu))
             error_log(e)
             continue
コード例 #12
0
 def get_pixivision_topics(cls, url, path):
     topic_list = HtmlDownloader.parse_illustration_topic(
         HtmlDownloader.download(url))
     if not topic_list:
         error_log(url + " not find any illustration topic")
         return
     for topic in topic_list:
         try:
             # 需要过滤掉特殊字符,否则会创建文件夹失败。
             # 创建特辑文件夹,写入特辑信息。
             save_path = path + "/" + CommonUtils.filter_dir_name(
                 topic.title)
             if not os.path.exists(save_path):
                 os.makedirs(save_path)
             CommonUtils.write_topic(save_path + "/topic.txt", topic)
             topic['save_path'] = save_path
         except Exception, e:
             continue
             error_log("Create topic path fail,topic url:" + topic.Href)
             error_log(e)