Esempio n. 1
0
 def parse_movie_item(self, response):
     item = MovieItem()
     item['url'] = response.url
     item['name'] = response.xpath(
         '//span[@property="v:itemreviewed"]/text()').extract_first()
     item['summary'] = response.xpath(
         '//span[@property="v:summary"]/text()').extract_first()
     item['score'] = response.xpath(
         '//strong[@property="v:average"]/text()').extract_first()
     return item
Esempio n. 2
0
 def parse_movie_item(self, response):
     item = MovieItem()
     item['score'] = response.css('strong.rating_num::text').extract_first()
     #if float(item['score']) > 8.0:
     item['url'] = response.url
     item['name'] = response.css(
         'div#content h1 span::text').extract_first()
     item['summary'] = response.xpath(
         '//span[@property="v:summary"]/text()').extract_first()
     return item
Esempio n. 3
0
 def parse_item(self, response):
     item = MovieItem()
     item['url'] = response.url
     item['name'] = response.xpath(
         '//div[@id="content"]//h1/span[1]/text()').extract_first().strip()
     item['summary'] = response.xpath(
         '//span[@property="v:summary"]/text()').extract_first().strip()
     item['score'] = response.xpath(
         '//strong[contains(@class, "rating_num")]/text()').extract_first()
     return item
Esempio n. 4
0
 def parse_movie_item(self, response):
     i = MovieItem()
     i['url'] = response.url
     i['name'] = response.xpath('//div[@id="content"]/h1/span[@property="v:itemreviewed"]/text()').extract_first()
     i['summary'] = response.xpath('//span[@property="v:summary"]/text()').re_first(r'\n*\s*(.*)\n*')
     i['score'] = response.css('div#interest_sectl').xpath('.//strong/text()').extract_first()
     #i['domain_id'] = response.xpath('//input[@id="sid"]/@value').extract()
     #i['name'] = response.xpath('//div[@id="name"]').extract()
     #i['description'] = response.xpath('//div[@id="description"]').extract()
     return i
Esempio n. 5
0
 def parse_movie_item(self, response):
     item = MovieItem()
     item['url'] = response.url
     item['name'] = response.xpath('//div[@id="content"]').xpath(
         './/h1/span/text()').extract_first()
     item['summary'] = response.xpath('//div[@id="content"]').xpath(
         './/span[@property="v:summary"]/text()').extract_first()
     item['score'] = response.xpath('//div[@id="content"]').xpath(
         './/strong[@class="ll rating_num"]/text()').extract_first()
     return item
Esempio n. 6
0
 def parse_movie_item(self, response):
     "TODO: 解析 item"
     item = MovieItem()
     item['url'] = response.url
     item['name'] = response.css('title::text').extract()[0][0:-5].strip()
     summary_text = response.css(
         'span[property="v:summary"]::text').extract_first()
     item['summary'] = re.sub('\s', '', summary_text)
     item['score'] = response.css(
         'strong[property="v:average"]::text').extract()[0]
     return item
Esempio n. 7
0
 def parse_movie_item(self, response):
     item = MovieItem()
     item['url'] = response.url
     item['name'] = response.xpath(
         '//span[@property="v:itemreviewed"]/text()').extract_first()
     item['summary'] = response.xpath(
         '//span[@property="v:summary"]/text()').extract_first()
     item['score'] = response.xpath(
         '//div[contains(@class,"rating_self")]/strong/text()'
     ).extract_first()
     return item
 def parse_movie_item(self, response):
     item = MovieItem()
     item['url'] = response.url
     item['name'] = response.xpath(
         '//span[@property="v:itemreviewed"]/text()').extract_first()
     #item['summary'] = response.xpath('//span[@class="all hidden"]/text()').extract_first()
     item['summary'] = response.xpath(
         '//span[@property="v:summary"]/text()').extract_first()
     item['score'] = response.xpath(
         '//strong[@class="ll rating_num"]/text()').extract_first()
     yield item
Esempio n. 9
0
 def parse_movie_item(self, response):
     item = MovieItem()
     item['url'] = response.url
     item['name'] = response.xpath(
         '//*[@id="content"]/h1/span[1]/text()').extract_first()
     item['summary'] = response.xpath(
         '//span[@property="v:summary"]/text()').extract_first().strip()
     item['score'] = response.xpath(
         '//*[@id="interest_sectl"]/div[1]/div[2]/strong/text()'
     ).extract_first()
     return item
Esempio n. 10
0
 def parse_movie_item(self, response):
     item = MovieItem()
     item['url'] = response.url
     item['name'] = response.xpath(
         '//span[@property="v:itemreviewed"]/text()').extract_first()
     item['summary'] = response.xpath(
         '//span[@property="v:summary"]/text()').extract_first().strip()
     item['score'] = float(
         response.css('strong.rating_num::text').extract_first().strip())
     if item['score'] >= 8:
         yield item
Esempio n. 11
0
 def parse_movie_page(self, response):
     if DoubanMoviePipeline.count > 40:
         return
     yield MovieItem({
         'url':
         response.url,
         'name':
         response.xpath('//h1/span[1]/text()').extract_first(),
         'summary':
         response.xpath(
             '//div[@id="link-report"]/span[1]/text()').extract_first(),
         'score':
         response.xpath('//strong[contains(@class,"rating_num")]/text()').
         extract_first(),
     })
Esempio n. 12
0
    def parse_page(self, response):
        meta = response.meta
        meta['data']['start'] += 20
        movie_datas = json.loads(response.text)
        item = MovieItem()

        if movie_datas['data']:

            for movie_data in movie_datas['data']:
                item['area'] = response.meta['area']
                for field in item.fields:
                    if field in movie_data.keys():
                        item[field] = movie_data.get(field)
                yield item
            url = self.header_url + urlencode(meta['data'])
            yield Request(url=url,
                          meta=deepcopy(meta),
                          callback=self.parse_page)