예제 #1
0
 def parser(self, request, response):
     # 解析列表
     article_list_url = response.xpath(
         '//div[@class="kjxw_tit"]/a/@href').extract()
     for url in article_list_url:
         log.debug("下发文章任务 url = {}".format(url))
         yield spider.Request(url, callback=self.parser_artile)
예제 #2
0
    def parser(self, request, response):
        for link in response.xpath('//ul[@class="branch_list_ul paging"]//a'):
            title = link.xpath("./text()").extract_first()
            url = link.xpath("./@href").extract_first()

            print("采集到列表 {} {}".format(title, url))

            yield spider.Request(url, title=title, callback=self.parser_detail)
예제 #3
0
 def start_requests(self, *args, **kws):
     yield spider.Request("http://www.bj.chinanews.com/focus/1.html")
예제 #4
0
 def start_requests(self, *args, **kws):
     yield spider.Request("https://www.baidu.com")
예제 #5
0
 def start_requests(self, task):
     # task 为在任务表中取出的每一条任务
     id, url = task  # id, url为所取的字段,main函数中指定的
     yield spider.Request(url, task_id=id)
예제 #6
0
 def start_requests(self, *args, **kws):
     # 下发列表任务
     yield spider.Request("http://column.caijing.com.cn/")
예제 #7
0
 def start_requests(self, *args, **kws):
     yield spider.Request("https://cn.bing.com/?mkt=zh-CN")