Beispiel #1
0
 def parse(self, response):
     Details = Selector(response=response).xpath(
         '//div[@class="channel-detail movie-item-title"]')
     for info in Details[0:10]:
         item = MaoyanItem()
         url = 'https://maoyan.com' + info.xpath(
             './a/@href').extract_first()
         yield scrapy.Request(url=url,
                              meta={'item': item},
                              callback=self.parse2)
Beispiel #2
0
 def parse_page(self, response):
     item = MaoyanItem()
     reg = re.compile(
         '<img data-src="http://p0.meituan.net/movie/(.*?)" .*?/>.*?<a href=".*?".*?>(.*?)</a>.*?<p class=".*?">(.*?)</p>.*?<p class="releasetime">(.*?)</p>.*?<i class="integer">(.*?)</i><i class="fraction">(.*?)</i>',
         re.S)
     contes = reg.findall(response.text)
     for cont in contes:
         item['img_url'] = 'http://p0.meituan.net/movie/' + cont[0],
         item['title'] = cont[1],
         item['star_name'] = cont[2].strip(),
         item['show_time'] = cont[3],
         item['score'] = cont[4] + cont[5]
     return item
Beispiel #3
0
    def parse(self, response):
        dd_list = response.xpath('//dl[@class="board-wrapper"]/dd')

        for dd in dd_list:
            # item={}
            item = MaoyanItem()
            item['name'] = dd.xpath('./a/@title').extract()[0]
            item['star'] = dd.xpath(
                './/p[@class="star"]/text()').extract()[0].strip()
            item['time'] = dd.xpath(
                './/p[@class="releasetime"]/text()').extract()[0]
            yield item
        for offset in range(10, 91, 10):
            url = 'http://maoyan.com/board/4?offset={}'.format(str(offset))
            #把地址交给调度器入队列
            yield scrapy.Request(url=url, callback=self.parse)
Beispiel #4
0
    def parse(self, response):
        items = MaoyanItem()
#       print("="*50)
#       a=1
        print(response.text)
        #print(response.xpath("//dl[@class='board-wrapper']//dd/a").extract())
        node_list = response.xpath("//dl[@class='board-wrapper']//dd")
#       print("222",node_list)
        for node in node_list:
#            a+=1
# print("1111",node)
            items['title'] = node.xpath(".//p[@class='name']/a/text()").extract()
            items['star'] = node.xpath(".//p[@class='star']/text()").extract()
            #print("-----{}-----{}".format(title[0],star[0]))
#           print("+"*50)
#       print("="*50,a)
            yield items
Beispiel #5
0
    def parse2(self, response):

        item = MaoyanItem()

        # 电影名称
        Movie_Name = Selector(response=response).xpath(
            '//h1[@class="name"]/text()').extract_first()

        # 电影类别
        Categories = []
        for Category in Selector(response=response).xpath(
                '//a[@class="text-link"]/text()').extract():
            Categories.append(Category.strip())
        Movie_Categories = '/'.join(Categories)

        # 上映时间
        Release_Date = Selector(response=response).xpath(
            '//li[@class="ellipsis"][3]/text()').extract_first()[:10]

        item['Movie_Name'] = Movie_Name
        item['Movie_Categories'] = Movie_Categories
        item['Release_Date'] = Release_Date
        return item
Beispiel #6
0
    def parse2(self, response):

        item = MaoyanItem()

        # 电影名称
        movie_name = Selector(response=response).xpath(
            '//h1[@class="name"]/text()').extract_first()

        # 电影类别
        categories = []
        for category in Selector(response=response).xpath(
                '//a[@class="text-link"]/text()').extract():
            categories.append(category.strip())
        movie_categories = '/'.join(categories)

        # 上映时间
        release_date = Selector(response=response).xpath(
            '//li[@class="ellipsis"][3]/text()').extract_first()[:10]

        item['movie_name'] = movie_name
        item['movie_categories'] = movie_categories
        item['release_date'] = release_date
        return item