def start_requests(self): yield feapder.Request("https://maoyan.com/", render=True)
def start_requests(self, task): task_id = task[0] url = task[1] yield feapder.Request(url, task_id=task_id)
def start_requests(self): ''' 生产任务 ''' yield feapder.Request("https://www.baidu.com")
def start_requests(self, task): task_id, item_id = task url = "https://item.jd.com/{}.html".format(item_id) yield feapder.Request(url, task_id=task_id) # 携带task_id字段
def start_requests(self, *args, **kws): for i in range(200): print(i) yield feapder.Request("https://www.baidu.com")
def start_requests(self): for i in range(100): print(f"下发任务 {i}") yield feapder.Request(f"https://www.baidu.com?p={i}")
def start_requests(self): yield feapder.Request(f"https://www.baidu.com")
def start_requests(self, task): yield feapder.Request(task.url, task_id=task.id)
def start_requests(self): for page_num in range(1, 2): url = "https://www.qiushibaike.com/8hr/page/{}/".format(page_num) yield feapder.Request(url)
def start_requests(self): for i in range(10): yield feapder.Request("https://news.qq.com/#{}".format(i), render=True)
def start_requests(self): yield feapder.Request("https://news.qq.com/")
def parse(self, request, response): movie_list = response.xpath("//dl[@class='movie-list']//dd") for movie in movie_list: cover = movie.xpath( './div[@class="movie-item film-channel"]//div[@class="movie-poster"]/img[2]/@data-src' ).extract_first() name = movie.xpath( './div[@class="channel-detail movie-item-title"]/@title' ).extract_first() url = movie.xpath( './div[@class="channel-detail movie-item-title"]/a/@href' ).extract_first() score = movie.xpath( 'string(./div[@class="channel-detail channel-detail-orange"])' ).extract_first() movie_type = movie.re_first("类型:</span>(.*?)<", default="").strip() main_actor = movie.re_first("主演:</span>(.*?)<", default="").strip() release_date = movie.re_first("上映时间:</span>(.*?)<", default="").strip() imax = movie.xpath( './div[@class="movie-item film-channel"]//div[@class="movie-ver"]/i/@class' ).extract_first() movie_id = url.split("/")[-1] item = maoyan_hot_movie_list_item.MaoyanHotMovieListItem() item.name = name item.movie_id = movie_id item.cover = cover item.url = url item.score = score item.movie_type = movie_type item.main_actor = main_actor item.release_date = release_date item.imax = imax item.city_id = request.city_id item.crawl_time = tools.get_current_date() yield item # 电影详情任务 for brand_id in setting.BRAND_IDS: detail_task_item = ( maoyan_film_detail_task_item.MaoyanFilmDetailTaskItem()) detail_task_item.movie_id = movie_id # 电影id detail_task_item.city_id = request.city_id # 城市id detail_task_item.brand_id = brand_id # 品牌 -1表示全部 detail_task_item.show_date = tools.get_current_date( "%Y-%m-%d") # 日期 yield detail_task_item # 翻页 if request.page == 1: total_page = response.xpath( '//ul[@class="list-pager"]//li[last()-1]/a/text()' ).extract_first() if total_page: total_page = int(total_page) for page in range(2, total_page + 1): yield feapder.Request(page=page, city_id=request.city_id, task_id=request.task_id) # 更新任务 yield self.update_task_batch(request.task_id, 1)
def start_requests(self, task): task_id, city_id = task yield feapder.Request(city_id=city_id, page=1, task_id=task_id)
def start_requests(self, task): task_id, url = task yield feapder.Request(url, task_id=task_id, render=True)
def start_requests(self): """ 注意 这里继承的是BaseParser,而不是Spider """ yield feapder.Request("https://news.sina.com.cn/")
def start_requests(self, *args, **kws): yield feapder.Request("https://www.baidu.com")
def start_requests(self): for i in range(1): yield feapder.Request(f"https://www.baidu.com#{i}", callback=self.parse)
def start_requests(self): yield feapder.Request("https://tophub.today/", download_midware=self.download_midware)
def start_requests(self): for i in range(100): yield feapder.Request("https://www.baidu.com#{}".format(i))