def start_request(self): url = f'https://maoyan.com/board/4' yield scrapy.request(url=url,callback=self.parse)
def start_request(self): url = '' yield scrapy.request(url=url,callback=self.parse)
def start_request(self): url = f'https://maoyan.com/board/4' yield scrapy.request(url=url)
def start_requests(self): yield scrapy.request()
def parse(self, response): for row in etree.HTML(response.body).xpath("//table[@class='DataGrid']//tr")[1:]: sec_page = start_urls[0] + ''.join(row.xpath("td/a/@href")) yield scrapy.request(sec_page, callback=self.sec_parse)
# -*- coding: utf-8 -*- import scrapy from movies.Item import MaoyanspidersItem import xlml.etree from bs4 import BeautifulSoup as bs class MoviesSpider(scrapy.Spider): name = 'movies' allowed_domains = ['maoyan.com'] start_urls = ['http://maoyan.com/'] # def parse(self, response): # pass def start_request(self): for i url = f'https://maoyan.com/' yield scrapy.request(url=url,callback=self.parse) def parse(self, response): soup = bs(response.text,'html.parser') print(response.text)