Python QtpjtItem Examples

Programming Language: Python

Namespace/Package Name: qtpjt.items

Class/Type: QtpjtItem

Examples at hotexamples.com: 8

Python QtpjtItem - 8 examples found. These are the top rated real world Python examples of qtpjt.items.QtpjtItem extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

QtpjtItem(8)

Frequently Used Methods

QtpjtItem (8)

Example #1

Show file

File: qtspd.py Project: callmeliuchu/codeGitBook

 def parse(self, response):
     item = QtpjtItem()
     paturl = "(http://pic.qiantucdn.com/58pic/.*?).jpg"
     item['picurl'] = re.compile(paturl).findall(str(response.body))
     patid = "http://pic.qiantucdn.com/58pic/.*?/.*?/.*?/(.*?).jpg"
     item['picid'] = re.compile(patid).findall(str(response.body))
     yield item

Example #2

Show file

 def parse(self, response):
     item = QtpjtItem()
     paturl = '(http://pic.qiantucdn.com/58pic/.*?).jpg'
     item['picurl'] = re.compile(paturl).findall(str(response.body))
     patlocal = "http://pic.qiantucdn.com/58pic/.*?/.*?/.*?/(.*?).jpg"
     item['picid'] = re.compile(patlocal).findall(str(response.body))
     yield item
     for i in range(1, 101):
         nexturl = "http://www.58pic.com/piccate/3-0-0-" + str(i) + ".html"
         yield Request(nexturl, callback=self.parse)

Example #3

Show file

File: qtspd.py Project: Harold1994/DeepinPythonWebClawler

    def parse(self, response):
        item = QtpjtItem()
        picurl = "(http://pic.qiantucdn.com/58pic/.*?).[jpeg|jpg]"
        item['picurl'] = re.compile(picurl).findall(str(response.body))
        item['picid'] = response.xpath('//a[@class="bottom-title"]/text()')
        yield item

        for i in range(1, 20):
            nexturl = 'http://www.58pic.com/piccate/3-0-0-' + str(i) + '.html'
            yield Request(nexturl, callback=self.parse)

Example #4

Show file

File: qtspd.py Project: whyismefly/pythoncrawl

 def parse(self, response):
     # pass
     item = QtpjtItem()
     paturl="(http://pic.qiantucdn.com/58pic/.*?).jpg"
     item["picurl"] = retest.compile(paturl).findall(str(response.body))
     patlocal = "http://pic.qiantucdn.com/58pic/.*?/.*?/.*?/(.*?).jpg"
     item["picid"] = retest.compile(patlocal).findall(str(response.body))
     yield item
     for i in range(1,201):
         nexturl="http://www.58pic.com/tb/id-"+str(i)+".html"
         yield Request(nexturl,callback=self.parse)

Example #5

Show file

    def parse(self, response):
        item = QtpjtItem()
        pat_url = "(http://pic.qiantucdn.com/58pic/.*?).jpg"
        item['picurl'] = re.findall(pat_url, str(response.body), re.I)

        pat_local = "http://pic.qiantucdn.com/58pic/.*?/.*?/.*?/(.*?).jpg"

        item['picid'] = re.findall(pat_local, str(response.body), re.I)
        yield item

        # 通过for循环遍历1~3页的内容
        for i in range(2, 4):
            nexturl = "http://www.58pic.com/tupian/yuebing-0-0-" + str(
                i) + ".html"
            yield Request(nexturl, callback=self.parse)

Example #6

Show file

File: qtspd.py Project: zfatgxuestc/crawler

    def parse(self, response):
        item = QtpjtItem()

        paturl = "(http://pic.qiantucdn.com/58pic/.*?/.*?/.*?/.*?.jpg!)qt324"
        item["picurl"] = re.compile(paturl).findall(str(response.body))
        patlocal = "http://pic.qiantucdn.com/58pic/.*?/.*?/.*?/(.*?).jpg"
        item["picid"] = re.compile(patlocal).findall(str(response.body))

        yield item
        for i in range(1,11):
            nexturl = "http://www.58pic.com/piccate/3-0-0-default-0_2_0_0_default_0-"+str(i)+".html"
            yield Request(nexturl,callback=self.parse)


        pass

Example #7

Show file

    def parse(self, response):
        item = QtpjtItem()
        item["picfolder"] = response.xpath(
            "//em[@class='text-green-b']/text()").extract()
        folder = os.path.exists(
            'C:\\Users\\leishen\\Documents\\anaconda3\\scrapy\\master python scrapy\\chapter 19\\pic'
            + '\\' + item["picfolder"][0])
        if not folder:
            os.mkdir(
                'C:\\Users\\leishen\\Documents\\anaconda3\\scrapy\\master python scrapy\\chapter 19\\pic'
                + '\\' + item["picfolder"][0])
        item["link"] = response.xpath(
            "//a[@class='thumb-box']/@href").extract()  #经过测试，成功
        # headers = {"Accept-Encoding":"utf-8,gb2312","User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36 SE 2.X MetaSr 1.0"}
        headers = (
            "User-Agent",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122 Safari/537.36 SE 2.X MetaSr 1.0"
        )
        opener = urllib.request.build_opener()
        opener.addheaders = [headers]
        # 将opener安装为全局
        urllib.request.install_opener(opener)
        for m in range(0, len(item["link"])):
            data = urllib.request.urlopen(item["link"][m]).read()
            paturl = '<img src="(http.*?)".*?show-area-pic'
            item["picurl"] = re.compile(paturl).findall(str(data))
            yield item
            # data为对应博客列表页的所有博文的点击数与评论数数据
            # data = urllib.request.urlopen(item["link"][m]).read().decode('gb2312')
            # for k in range(0, len(item["picurl"])):
            #     patlocal = "http://pic.qiantucdn.com/58pic/.*?/.*?/.*?/(.*?).jpg"
            #     item["picid"][k]=re.compile(patlocal).findall(str(item["picurl"][k]))[0]+"-"+str(k)
            # item["picid"]=tupianm
            # picid = 'id="show-area-pic".*?alt="(.*?)"'
            # item["picid"] = re.compile(picid).findall(str(data))

#通过for循环依次遍历1到200页图片列表页
        for i in range(2, 3):
            #构造出下一页图片列表页的网址
            nexturl = "http://www.58pic.com/piccate/3-0-0-default-0_2_0_0_default_0-" + str(
                i) + ".html"
            yield Request(nexturl, callback=self.parse)
        '''

Example #8

Show file

File: 第十九章源码.py Project: jhh130910/Book-PDF-Free

    def parse(self, response):
        item=QtpjtItem()
#构建提取缩略图网址的正则表达式
        paturl="(http://pic.qiantucdn.com/58pic/.*?).jpg"