Ejemplo n.º 1
0
    def duowanfunc():
        girlurl=rq_girl.get()
        r=requests.get(girlurl)
        soup=BeautifulSoup(r.text,"html.parser")
        # girlpage=soup.find('td',id=re.compile('postmessage.*'),attrs={'class':'t_f'})
        girlpage=soup.find('div',attrs={'class':'t_fsz'})
        if not girlpage:
            return
        IDkeys=['ID','id']
        # 提取妹纸id
        p=re.compile('(?:id|ID)[\s|:|:|;|;]*(\S+)\s*')
        girlpagetext=girlpage.text
        nickname=p.findall(girlpagetext)

        # 判断是否存在id关键字
        if len(nickname)<1:
            return False

        # 判断有没有照片
        pics=girlpage.find_all('img',zoomfile=True)

        if len(pics)<1:
            return False
        # nickname=nickname[0]

        # 保存图片url
        if len(pics)>5:
            pics=pics[:5]
        piclist=[utils.quote_url(pic.attrs['zoomfile']) for pic in pics]
        return {'nickname':nickname,'picurls':piclist}
Ejemplo n.º 2
0
def duowanlol1():
    pre_url=r"http://bbs.duowan.com/"
    url="http://bbs.duowan.com/forum.php?mod=forumdisplay&fid=1343&orderby=dateline&typeid=7092&filter=author&orderby=dateline&typeid=7092&page=%s"
    i=0
    while True:
        time.sleep(1)
        # 计数循环
        i+=1
        if i==1000:
            return
        purl=url % i
        r=requests.get(purl)
        p=re.compile('求封面</a>\]</em> <span id="thread_\d+?"><a href="(.+?)"')
        # urls=[pre_url+x for x in p.findall(r.text)]
        urls=p.findall(r.text)
        if urls is None:
            return
        for x in urls:
            tmp=pre_url+utils.quote_url(x)
            rq.put(tmp)
            print(tmp)
        print('=========duowan1-',i,' =========')
Ejemplo n.º 3
0
def qqlol():
    pre_url=r"http://bbs.lol.qq.com/"
    url="http://bbs.lol.qq.com/forum.php?mod=forumdisplay&fid=205&typeid=966&typeid=966&filter=typeid&page=%d"
    i=0
    while True:
        time.sleep(1)
        # 计数循环
        i+=1
        if i==1000:
            return
        purl=url % i
        r=requests.get(purl)
        p=re.compile('我要曝照</a>\]</em> <a href="(.+?)"')
        # urls=[pre_url+x for x in p.findall(r.text)]
        urls=p.findall(r.text)
        if urls is None:
            return
        for x in urls:
            tmp=pre_url+utils.quote_url(x)
            rq.put(tmp)
            print(tmp)
        print('=========qqlol-',i,' =========')
Ejemplo n.º 4
0
def duowanlol1():
    pre_url = r"http://bbs.duowan.com/"
    url = "http://bbs.duowan.com/forum.php?mod=forumdisplay&fid=1343&orderby=dateline&typeid=7092&filter=author&orderby=dateline&typeid=7092&page=%s"
    i = 0
    while True:
        time.sleep(1)
        # 计数循环
        i += 1
        if i == 1000:
            return
        purl = url % i
        r = requests.get(purl)
        p = re.compile('求封面</a>\]</em> <span id="thread_\d+?"><a href="(.+?)"')
        # urls=[pre_url+x for x in p.findall(r.text)]
        urls = p.findall(r.text)
        if urls is None:
            return
        for x in urls:
            tmp = pre_url + utils.quote_url(x)
            rq.put(tmp)
            print(tmp)
        print('=========duowan1-', i, ' =========')
Ejemplo n.º 5
0
def qqlol():
    pre_url = r"http://bbs.lol.qq.com/"
    url = "http://bbs.lol.qq.com/forum.php?mod=forumdisplay&fid=205&typeid=966&typeid=966&filter=typeid&page=%d"
    i = 0
    while True:
        time.sleep(1)
        # 计数循环
        i += 1
        if i == 1000:
            return
        purl = url % i
        r = requests.get(purl)
        p = re.compile('我要曝照</a>\]</em> <a href="(.+?)"')
        # urls=[pre_url+x for x in p.findall(r.text)]
        urls = p.findall(r.text)
        if urls is None:
            return
        for x in urls:
            tmp = pre_url + utils.quote_url(x)
            rq.put(tmp)
            print(tmp)
        print('=========qqlol-', i, ' =========')