def download_image_byid(cls, id):
     if id:
         detail = PixivApi.illust_detail(id)
         print(detail)
         if detail:
             download_url = ImageDownload.get_image_url(None, detail)
             if download_url:
                 PixivApi.download(download_url)
             else:
                 print("download by id fail,can't find download url")
         else:
             print("can't get detail id:" + str(id))
Example #2
0
def download_all_by_id(illust_id, path, limit_p=True):
    detail = PixivApi.illust_detail(illust_id)
    if detail:
        try:
            detail = detail.illust
            # 普通插画
            if detail.page_count == 1:
                try:
                    url = detail.meta_single_page.original_image_url
                except:
                    url = detail.image_urls.large
                extension = os.path.splitext(url)[1]
                save_path = path + "/p_%s%s" % (illust_id, extension)
                print("Downloading:" + save_path)
                path = PixivApi.download(url, path=save_path)
            # 多图插画
            else:
                if detail.page_count > P_LIMIT and limit_p:
                    # 该插画P数大于最大限制,放弃下载
                    print("Pixiv id:%s P>limit,Skip download" % (illust_id,))
                    return
                urls = detail.meta_pages
                # 获取多图
                if len(urls) > 1:
                    # 多图放入一个文件夹中
                    path += "/p_%s" % illust_id
                    if not os.path.exists(path):
                        os.mkdir(path)
                    for index in range(len(urls)):
                        try:
                            url = urls[index].image_urls.original if \
                                urls[index].image_urls.has_key("original") else urls[index].image_urls.large
                            extension = os.path.splitext(url)[1]
                            save_path = path + "/p_%s_%d%s" % (illust_id, index, extension)
                            print("Downloading:" + save_path)
                            PixivApi.download(url, path=save_path)
                        except Exception:
                            continue
                    path = path + "/"
                else:
                    # 获取多图失败,下载大图
                    url = detail.image_urls.large
                    path = PixivApi.download(url, prefix=path)
            return path
        except Exception as e:
            error_log("Download fail:" + detail)
            error_log(e)
    else:
        print(" can't get detail id :" + str(illust_id))
Example #3
0
def download_illustration(illu, path, auth_api):
    """
    #illu 包含插画详细
    path 存储路径
    auth_api 具有身份验证的下载工具
    """
    if illu.has_key("url") and illu.has_key("title"):
        illust_id = CommonUtils.get_url_param(illu.url, "illust_id")
        detail = PixivApi.illust_detail(illust_id)
        if detail:
            try:
                detail = detail.illust
                # 普通插画
                if detail.page_count == 1:
                    try:
                        url = detail.meta_single_page.original_image_url
                    except:
                        url = detail.image_urls.large
                    download(illust_id, illu.title, path, url, auth_api)
                # 多图插画
                else:
                    if detail.page_count > P_LIMIT:
                        # 该插画P数大于最大限制,放弃下载
                        print("Pixiv id:%s,name:%s P>limit,Skip download" % (illust_id, illu.title))
                        return
                    urls = detail.meta_pages
                    # 获取多图
                    if len(urls) > 1:
                        # 多图放入一个文件夹中
                        path += "/p_%s" % illust_id
                        if not os.path.exists(path):
                            os.mkdir(path)
                        for index in range(len(urls)):
                            try:
                                url = urls[index].image_urls.original if \
                                    urls[index].image_urls.has_key("original") else urls[index].image_urls.large
                                extension = os.path.splitext(url)[1]
                                if IMAGE_USE_ORG_NAME:
                                    save_path = path + "/p_%s_%s_%d%s" % (
                                        illust_id,
                                        CommonUtils.filter_dir_name(illu.title),
                                        index, extension)
                                else:
                                    save_path = path + "/p_%s_%d%s" % (illust_id, index, extension)
                                print(save_path)
                                auth_api.download(url, path=save_path)
                            except:
                                continue
                    else:
                        # 获取多图失败,下载大图
                        url = detail.image_urls.large
                        download(illust_id, illu.title, path, url, auth_api)
            except Exception, e:
                error_log("Download fail:")
                error_log(e)
        else:
            print(illu.title + " can't get detail id :" + illust_id)
def consumer_download_work(queue, save_path):
    while True:
        try:
            illust = queue.get()
            if illust.page_count == 1:
                try:
                    url = illust.meta_single_page.original_image_url
                except:
                    url = illust.image_urls.large
            else:
                url = illust.image_urls.large
            extension = os.path.splitext(url)[1]
            image_save_path = save_path + "/p_%s%s" % (illust.id, extension)
            PixivApi.download(url, path=image_save_path)
            print("download " + image_save_path + "\n")
        except Exception, e:
            print("download Fail  remove id" + str(illust.id))
            print(e)
            continue
        finally:
 def download_topics(cls, url, path, quality=1):
     html = HtmlDownloader.download(url)
     illu_list = HtmlDownloader.parse_illustration(html)
     title_des = HtmlDownloader.get_title(html)
     if title_des and illu_list:
         title_des["size"] = len(illu_list)
         CommonUtils.write_topic_des(path + "/topic.txt", title_des)
     if not illu_list:
         return
     for illu in illu_list:
         try:
             filename = CommonUtils.filter_dir_name(illu.title)
             extension = os.path.splitext(illu.image)[1]
             id = CommonUtils.get_url_param(illu.image_page, "illust_id")
             if quality == 1:
                 # 通过api获取 插画原图地址,下载原图
                 detail = PixivApi.illust_detail(id)
                 if detail:
                     download_url = ImageDownload.get_image_url(
                         illu, detail)
                     if IMAGE_USE_ORG_NAME:
                         save_path = path + "/p_%s_%s%s" % (id, filename,
                                                            extension)
                     else:
                         save_path = path + "/p_%s%s" % (id, extension)
                     print(save_path)
                     PixivApi.download(download_url, path=save_path)
                 else:
                     print(illu.title + " can't get detail id :" + id)
             else:
                 # 直接下载 pixivision 展示图
                 print(path + "/p_%s_%s%s" % (id, filename, extension))
                 PixivApi.download(illu.image,
                                   path=path + "/p_%s_%s%s" %
                                   (id, filename, extension))
         except Exception, e:
             error_log("Download Illu Fail:" + " Illustration :" +
                       str(illu))
             error_log(e)
             continue
def relate_illust(seed):
    queue = Queue()
    r = redis.Redis(REDIS_IP, REDIS_PORT)
    i_filter = RedisFilter(r, 5, "setFilter2:PixivRelated")
    save_path = "E:/imageDownLoad/related_%s" % str(seed)
    if not os.path.exists(save_path):
        os.mkdir(save_path)
    # 启动消费者下载器
    for i in range(3):
        t = Thread(target=consumer_download_work,
                   args=(queue, save_path, i_filter))
        t.daemon = True
        t.start()

    related = PixivApi.illust_related(seed)
    # 解析返回json串,将下载url放入队列
    producer_put_work(related, queue, i_filter)
    if related.has_key("next_url"):
        url = related.next_url
    else:
        print("There is no next URL,(没有查询到关联作品)")
        return
    count = 1
    while True:
        # 间隔时间
        # time.sleep(2)
        resp = HtmlDownloader.download(url)
        related2 = parse_json(resp)
        if related.has_key("next_url"):
            url = related.next_url
        else:
            print("There is no next URL,(没有查询到关联作品)")
            break
        print("Depth :" + str(count) + " Associated illust:" +
              str(len(related2.illusts)))
        print("Next URL:" + related2.next_url)
        producer_put_work(related2, queue, i_filter)
        # 需要到达的深度
        if count == 2:
            print("producer completed!")
            break
        count += 1
    queue.join()
def relate_illust(seed, depth=2, image_path='imageDownload'):
    queue = Queue()
    save_path = (image_path + "/related_%s") % str(seed)
    if not os.path.exists(save_path):
        os.mkdir(save_path)
    # 启动消费者下载器
    for i in range(3):
        t = Thread(target=consumer_download_work, args=(queue, save_path))
        t.daemon = True
        t.start()

    related = PixivApi.illust_related(seed)
    # 解析返回json串,将下载url放入队列
    producer_put_work(related, queue)
    if related.has_key("next_url"):
        url = related.next_url
    else:
        print("There is no next URL,(无法查询到关联作品)")
        return
    count = 1
    while True:
        # 间隔时间
        # time.sleep(2)
        resp = HtmlDownloader.download(url)
        related2 = parse_json(resp)
        if related.has_key("next_url"):
            url = related.next_url
        else:
            print("There is no next URL,(没有查询到关联作品)")
            break
        print("Depth :" + str(count) + " Associated illust:" +
              str(len(related2.illusts)))
        print("Next URL:" + related2.next_url)
        producer_put_work(related2, queue)
        # 需要到达的深度
        if count == depth:
            print("producer completed!")
            break
        count += 1
    queue.join()
Example #8
0
def test_api():
    detail = PixivApi.illust_detail(54809586)
    print(detail.illust)
    related = PixivApi.illust_related(54809586)
    print(related)
Example #9
0
def download_test(url):
    print("start download:" + str(time.time()))
    PixivApi.download(url)
    # 取最终一个url下载结束时间
    print("url:" + url + " end:" + str(time.time()))
Example #10
0
# 没有安装 twisted 时,只能使用 顺序线程下载。
def run_by_list():
    error_log("start:" + str(time.time()))
    # Pixivision全站插图爬取
    urls = [LINK_URL % n for n in range(1, PAGE_NUM + 1)]
    # 步伐,每次启动 2 *20 个图片下载进程 ,可根据电脑性能调整线程大小。其实运行速度和内存CPU关系不大,关键是网速
    step = 2
    length = len(urls)
    start_index = 0
    while start_index < length:
        launchers = []
        for url in urls[start_index:(start_index + step)]:
            print("Start " + url)
            launchers.append(PixivisionLauncher(url, IMAGE_SAVE_BASEPATH))
        for launcher in launchers:
            launcher.start()
        for launcher in launchers:
            launcher.join()
        start_index += step
    error_log("end:" + str(time.time()))


if __name__ == '__main__':
    PixivApi.check_api()
    try:
        from twisted.python.threadpool import ThreadPool
    except Exception:
        run_by_list()
    else:
        run_by_pool()
Example #11
0
def download_all_by_url(url, prefix):
    illust_id = CommonUtils.get_url_param(url, "illust_id")
    if illust_id:
        return download_all_by_id(illust_id, prefix)
    else:
        return PixivApi.download(url.strip(), prefix=prefix)
 def download_byurl(cls, url):
     illust_id = CommonUtils.get_url_param(url, "illust_id")
     if illust_id:
         ImageDownload.download_image_byid(illust_id)
     else:
         PixivApi.download(url.strip())
Example #13
0
from pixivapi.PixivApi import PixivApi
from utils import CommonUtils

if __name__ == '__main__':
    type = raw_input("Please chose run mode.1.Use pixiv_config file to search. 2. Enter the parameters manually:\n")
    if type == "1":
        username = USERNAME
        password = PASSWORD
        print ("Loading")
        # PixivDataDownloader.PixivDataHandler() 也可以不登陆进行数据爬取,但不登陆就没有人气推荐作品。爬取的插画质量会低很多,所以干脆强制要求登录了。
        if len(PIXIV_COOKIES) >= 3:
            data_handler = PixivDataDownloader.PixivDataHandler(cookies=PIXIV_COOKIES)
        else:
            data_handler = PixivDataDownloader.PixivDataHandler(username, password)
        # 这里可以使用两种api进行下载, AuthPixivApi和PixivApi 。 AuthPixivApi需要登录,但能下载更多限制级别的插画。通常情况PixivApi即可满足需求。
        auth_api = PixivApi()
        print("Login success!!!!")
        download_threshold = DOWNLOAD_THRESHOLD
        path = SEARCH_SAVE_PATH
        page = SEARCH_PAGE
        keyword = SEARCH_KEYWORD
    else:
        username = raw_input("Please enter your pixiv accounts eamil or pixiv ID\n")
        password = raw_input('Enter password:\n ')
        print ("Loading")
        data_handler = PixivDataDownloader.PixivDataHandler(username, password)
        auth_api = AuthPixivApi(username, password)
        print("Login success!!!!")
        path = raw_input("Please input illustration save path:\n")
        page = int(raw_input("Please enter the total number of pages you want to crawl:\n"))
        download_threshold = int(raw_input("Please enter the minimum number of illustration's bookmarks:\n"))
Example #14
0
def test_api():
    detail = PixivApi.illust_detail(52819443)
    print(detail.illust)