Exemple #1
0
 def parse(self, response):
     '''
     默认处理start_url地址对应的响应
     :param response: response对象
     :return: item或者是request
     '''
     yield Item(response.body)
 def parse(self, response):
     """
     默认处理start_url地址对应的响应
     :param response:
     :return:
     """
     yield Item(response.body)
Exemple #3
0
 def parse(self, response):
     """
     解析响应对象
     :param response:
     :return:
     """
     return Item(response.url)
Exemple #4
0
 def parse_detail(self, response):
     item = response.meta['item']
     item['stats-vote'] = response.xpath(
         "//span[@class='stats-vote']/i/text()")
     item['stats-vote'] = item['stats-vote'][0] if len(
         item['stats-vote']) > 0 else None
     yield Item(item)
Exemple #5
0
    def parse_detail(self, response):
        '''解析详情页'''
        item = response.meta["item"]
        item["url"] = response.url
        # print('item:', item)  # 打印一下响应的url
        # return []  # 由于必须返回一个容器,这里返回一个空列表

        yield Item(item)  #或者yield Item对象
Exemple #6
0
 def parse_detail(self, response):
     """解析详情页数据"""
     data = response.meta['data']
     # print(data)
     data['movie_length'] = response.xpath(
         '//span[@property="v:runtime"]/text()')[0]
     print(data)
     yield Item(data)
Exemple #7
0
    def parse(self, response):
        '''解析豆瓣电影top250列表页'''
        title_list = []  # 存储所有的
        for li in response.xpath("//ol[@class='grid_view']/li"):  # 遍历每一个li标签
            title = li.xpath(
                ".//span[@class='title'][1]/text()")  # 提取该li标下的 标题
            title_list.append(title[0])
            # title_list.apppend(title[0])    # 故意写错,发现程序卡死,没任何提示

        yield Item(title_list)  # 返回标题
Exemple #8
0
 def parse(self, response):
     divs = response.xpath('//*[@id="content"]/div/div[1]/ol/li')
     for div in divs:
         dic = {}
         #dic['url'] = response.url
         dic['name'] = div.xpath('./div/div[2]/div[1]/a/span[1]/text()')[0]
         item = Item(dic)
         detail_url = div.xpath('./div/div[2]/div[1]/a/@href')[0]
         #yield item
         yield Request(detail_url,
                       callback=self.parse_detail,
                       meta={'item': item})
Exemple #9
0
 def parse(self, response):
     """对响应进行处理"""
     # 获取包含电影信息的li标签列表
     lis = response.xpath('//*[@id="content"]/div/div[1]/ol/li')
     # 遍历lis获取每个电影名
     for li in lis:
         dic = {'name': li.xpath('./div/div[2]/div[1]/a/span[1]/text()')[0]}
         # print(dic)
         item = Item(dic)
         # 请求详情页,构造详情页的请求
         # 1.准备详情的URL
         detail_url = li.xpath('./div/div[2]/div[1]/a/@href')[0]
         yield Request(detail_url, callback=self.parse_detail, meta={'item': item})
Exemple #10
0
 def parse(self, response):
     """
     解析响应对象
     :param response:
     :return:
     """
     a_s = response.xpath('//div[@class="hd"]/a')
     for a in a_s:
         data = {}
         data['movie_name'] = a.xpath('./span[1]/text()')[0]
         data['movie_url'] = a.xpath('./@href')[0]
         # print(data)
         # yield Item(data)
         # yield Request(data['movie_url'], callback=self.parse_detail, meta={'data': data} )
         yield Item(data)
Exemple #11
0
    def parse(self, response):
        """提取页面的数据"""

        # 先分组,在提取数据

        div_list = response.xpath("//div[@id='content-left']/div")
        for div in div_list[:1]:
            item = {}
            item["name"] = div.xpath(".//h2/text()")[0].strip()
            item["age"] = div.xpath(
                ".//div[contains(@class,'articleGender')]/text()")
            item["age"] = item["age"][0] if len(item["age"]) > 0 else None
            item["gender"] = div.xpath(
                ".//div[contains(@class,'articleGender')]/@class")
            # item["gender"] = item["gender"][0].split(' ')[-1].replace("Icon", "") if len(["gender"]) > 0 else None
            item["gender"] = item["gender"][0].split(" ")[-1].replace(
                "Icon", "") if len(item["gender"]) > 0 else None
            item["href"] = urllib.parse.urljoin(response.url,
                                                div.xpath("./a/@href")[0])
            # print(item)
            yield Item(item)
            yield Request(item["href"],
                          parse="parse_detail",
                          meta={"item": item})
Exemple #12
0
 def parse(self, response):
     '''响应体数据是js代码'''
     # 使用js2py模块,执行js代码,获取数据
     ret = js2py.eval_js(
         response.body.decode("gbk"))  # 对网站分析发现,数据编码格式是gbk的,因此需要先进行解码
     yield Item(ret.list)
Exemple #13
0
 def parse(self, response):
     return Item(response.body)
Exemple #14
0
 def parse(self, response):
     yield Item(response.xpath("//title/text()")[0])
Exemple #15
0
 def parse(self, response):
     '''
     解析请求
     并返回新的请求对象、或者数据对象
     '''
     return Item(response.body)  # 返回item对象
Exemple #16
0
 def parse_detail(self, response):
     item = response.meta['item']
     item['stats-vote'] = response.xpath(
         "//span[@class='stats-vote']/i/text()")[0]
     yield Item(item)
Exemple #17
0
 def parse_page(self, response):
     """
         处理每个电影详情页的响应
     """
     yield Item(response.url)
Exemple #18
0
    def parse(self, response):
        title_list = response.xpath("//span[@class='title'][1]/text()")

        yield Item(title_list)
        """
Exemple #19
0
 def parse(self, response):
     for node in response.xpath("//div[@class='hd']"):
         title = node.xpath(".//span[@class='title'][1]/text()")[0]
         yield Item(title)
Exemple #20
0
 def parse(self, response):
     yield Item(response.body)
Exemple #21
0
 def parse(self, response):
     """
     解析响应
     返回新的请求对象或者数据对象
     """
     return Item(response.body)
Exemple #22
0
    def parse(self, response):
        # 3.对响应数据进行解析,返回数据或新请求
        item = Item(response.body)

        return item
Exemple #23
0
 def parse(self,response):
     """解析"""
     yield Item(response.body)
Exemple #24
0
    def parse(self, response):
        title = response.xpath("//title/text()")[0]

        yield Item(title)
Exemple #25
0
 def parse_detail(self, response):
     """详情页响应函数"""
     item = response.meta["item"]
     item["stats_vote"] = response.xpath(
         "//span[@class='stats-vote']/i/text()")[0]
     yield Item(item)
Exemple #26
0
 def parse(self, response):
     yield Item(response.url)
 def parse_detail(self,response):
     item = response.meta.get('item')
     item['movie_starring'] = response.xpath('//*[@id="info"]/span[10]/text()')
     yield Item(item)