def run(): process = CrawlerProcess() process.settings = get_project_settings() # ================ 修改这块 ====================================== # process.crawl 是添加爬虫 process.crawl(LianjiaLoupanSpider) # 链家楼盘列表 process.crawl(LianjiaInfoSpider) # 链家楼盘开盘信息 process.crawl(LianjiaCommentSpider) # 链家楼盘评论信息 # ================================================================= # 启动 上面的 所有爬虫 process.start()
def get_spiders(): """returns a dict of spiders """ settings = get_project_settings() crawler = CrawlerProcess(settings) crawler.settings = settings crawler.configure() spiders = {} for spname in crawler.spiders.list(): spider = crawler.spiders.create(spname) module_name = spider.__module__ if not "_feedspider" in module_name: match_obj = re.match(r"openrecipes\.spiders\.([a-zA-Z0-9]+)_spider", module_name) if match_obj: short_name = match_obj.group(1) spiders[short_name] = spider return spiders
def get_spiders(): """returns a dict of spiders """ settings = get_project_settings() crawler = CrawlerProcess(settings) crawler.settings = settings crawler.configure() spiders = {} for spname in crawler.spiders.list(): spider = crawler.spiders.create(spname) module_name = spider.__module__ if not '_feedspider' in module_name: match_obj = re.match(r'openrecipes\.spiders\.([a-zA-Z0-9]+)_spider', module_name) if match_obj: short_name = match_obj.group(1) spiders[short_name] = spider return spiders
def run(): process = CrawlerProcess() process.settings = get_project_settings() # process.crawl(ForumSpider) # 论坛爬虫 process.crawl(FallenarkSpider) # fallenark爬虫 process.start()