Python CountingColors Examples

Programming Language: Python

Class/Type: CountingColors

Examples at hotexamples.com: 7

Python CountingColors - 7 examples found. These are the top rated real world Python examples of CountingColors extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

getBf(5)

createDir(2)

Frequently Used Methods

getBf (5)

createDir (2)

Example #1

Show file

def main():
    # 因为要采集的列表不是很多，手动组装了网址列表
    urllist = [
        "https://alexa.chinaz.com/Global/index.html",
        "https://alexa.chinaz.com/Global/index_2.html",
        "https://alexa.chinaz.com/Global/index_3.html",
        "https://alexa.chinaz.com/Global/index_4.html",
    ]

    for url in urllist:
        bf = CountingColors.getBf(url)
        ulbf = bf.find(name="ul", attrs={"class": "rowlist"})  # 定位数据位置
        li_list = ulbf.find_all(name="li", attrs={"class": "clearfix"})
        for li in li_list:  # 再次循环输出处理每一条网站数据
            alexacount = li.find(name="div", attrs={
                "class": "count"
            }).string  # 网站排名
            sitename = li.find(name="span").string  #网站名称
            siteinfo = li.find(name="p").string  #网站简介
            #网站URL
            #由于出现个别URL的缺失，这里抛出错误后修正URL
            try:
                siteurl = li.find(name="a", attrs={
                    "class": "tohome"
                }).get("href")
            except:
                siteurl = sitename
            print("{} {} {}".format(
                alexacount,
                sitename,
                siteurl,
            ))
            print(siteinfo)

Example #2

Show file

File: test_10jqka.py Project: bosichong/CountingColors

def main():
    url = "http://data.10jqka.com.cn/market/longhu/"  #需要采集的网址
    bf = CountingColors.getBf(url)  #获取bs4对象
    yybdiv = bf.find(name="div", attrs={
        "class": "yyb"
    }).table.tbody  #搜索数据的table
    trlist = yybdiv.find_all(name="tr", )  #采集table内的数据
    for tds in trlist:  #循环打印出需要采集的数据。
        tdlist = tds.find_all(name="td")
        k = 0
        for td in tdlist:
            if k != 1:
                print("{} ".format(td.text), end="")
            else:
                #如果有title缺失的情况下
                print("{} ".format(td.a["title"]), end="")
            k += 1
        print("")

Example #3

Show file

File: 异步test_meinvtupian.py Project: bosichong/CountingColors

def getImgUrls(url):
    '''
    获取每个列表页上的所有图片最终的网页地址，并循环下载
    本函数式是本次下载的关键方法，是程序的核心。
    url: 图片列表页
    return list 一个列表页上所有图片的网页地址
    '''
    bf = CountingColors.getBf(url)#获取当前网页的bs4对象，用来解析HTML
    pageshtml = bf.find(name="div",attrs={"id":"container"})
    pageslist = pageshtml.find_all(name="a")

    # 常规的单线程 多线程保存图片 
    # for url in pageslist:
    #     downImg(url) 
    ####asyncio 异步下载图片
    tasks = [downImg(url) for url in pageslist ]
    loop = asyncio.get_event_loop()
    loop.run_until_complete(asyncio.wait(tasks))

Example #4

Show file

File: test2_Consumer.py Project: bosichong/CountingColors

def getImgUrls(l, iq):
    '''
    获取每个列表页上的所有图片最终的网页地址，并循环下载
    本函数式是本次下载的关键方法，是程序的核心。
    url: 图片列表页
    uq: 注册在网上的queue
    '''
    loop = asyncio.get_event_loop()
    bf = CountingColors.getBf(l)
    pageshtml = bf.find(name="div", attrs={"id": "container"})
    pageslist = pageshtml.find_all(name="p")
    # for p in pageslist:
    #     url = p.find(name="a").get("href")
    #     name= p.find(name="alt")
    #     data = [url,name,]
    # print(url)
    tasks = [downImg(p, iq) for p in pageslist]
    loop = asyncio.get_event_loop()
    loop.run_until_complete(asyncio.wait(tasks))

Example #5

Show file

File: test2_Consumer.py Project: bosichong/CountingColors

def getImgUrl(url):
    '''获取图片的最终下载地址'''
    bf = CountingColors.getBf(url)
    url = bf.find(name="a", attrs={"class": "image_gall"})
    return url.get('href')

Example #6

Show file

File: 异步test_meinvtupian.py Project: bosichong/CountingColors

def main():

    CountingColors.createDir(IMAGES_PATH)#创建图片保存目录
    # 单个列表页上所有图片下载测试
    getImgUrls("https://sc.chinaz.com/tupian/meinvtupian_2.html")

Example #7

Show file

File: test2_Manager.py Project: bosichong/CountingColors

    # 重新获取已经在网上注册的队列,使用队列名()方法来获得网上注册的队列名。
    uq = manager.uq()
    iq = manager.iq()

    # 开始任务，无非就是三个任务：
    while True:
        if not uq.empty():  # 如果有消息发来
            time.sleep(1)
            print("还有好多任务没有完成！")
        elif not iq.empty():  # 如果发来图片，我来保存
            data = iq.get(timeout=3)
            imgname = data[1] + re.search(r'[^/]+.jpg',
                                          data[0]).group()  # 图片名称
            imgpath = IMAGES_PATH + imgname

            # 保存图片到硬盘
            with open(imgpath, 'wb') as f:
                f.write(data[2])
            print("{}已经保存到{}".format(data[1], imgpath))
        else:
            time.sleep(1)
            print("无聊的等待，他们干活的效率可真慢啊！")


if __name__ == '__main__':  # windows运行下,当这个文件被导入时候，如果用了这个if就可以避免没被封装的语句被执行
    CountingColors.createDir(IMAGES_PATH)  #创建图片保存目录
    print("图片存储目录已创建！")
    freeze_support()
    print('To start putting tasks in the Queue...')
    do_taskmaster()