Exemple #1
0
def run():
    process = CrawlerProcess()
    process.settings = get_project_settings()

    # ================   修改这块 ======================================
    # process.crawl 是添加爬虫

    process.crawl(LianjiaLoupanSpider)  # 链家楼盘列表
    process.crawl(LianjiaInfoSpider)  # 链家楼盘开盘信息
    process.crawl(LianjiaCommentSpider)  # 链家楼盘评论信息

    # =================================================================
    # 启动 上面的 所有爬虫
    process.start()
def get_spiders():
    """returns a dict of spiders
    """
    settings = get_project_settings()
    crawler = CrawlerProcess(settings)
    crawler.settings = settings
    crawler.configure()

    spiders = {}
    for spname in crawler.spiders.list():
        spider = crawler.spiders.create(spname)
        module_name = spider.__module__
        if not "_feedspider" in module_name:
            match_obj = re.match(r"openrecipes\.spiders\.([a-zA-Z0-9]+)_spider", module_name)
            if match_obj:
                short_name = match_obj.group(1)
                spiders[short_name] = spider

    return spiders
Exemple #3
0
def get_spiders():
    """returns a dict of spiders
    """
    settings = get_project_settings()
    crawler = CrawlerProcess(settings)
    crawler.settings = settings
    crawler.configure()

    spiders = {}
    for spname in crawler.spiders.list():
        spider = crawler.spiders.create(spname)
        module_name = spider.__module__
        if not '_feedspider' in module_name:
            match_obj = re.match(r'openrecipes\.spiders\.([a-zA-Z0-9]+)_spider',
                            module_name)
            if match_obj:
                short_name = match_obj.group(1)
                spiders[short_name] = spider

    return spiders
Exemple #4
0
def run():
    process = CrawlerProcess()
    process.settings = get_project_settings()
    # process.crawl(ForumSpider)  # 论坛爬虫
    process.crawl(FallenarkSpider)  # fallenark爬虫
    process.start()