Example #1
0
File: test.py Project: 0326/scrapy
def get_crawler(spidercls=None, settings_dict=None):
    """Return an unconfigured Crawler object. If settings_dict is given, it
    will be used to populate the crawler settings with a project level
    priority.
    """
    from scrapy.crawler import CrawlerRunner
    from scrapy.settings import Settings
    from scrapy.spider import Spider

    runner = CrawlerRunner(Settings(settings_dict))
    return runner._create_crawler(spidercls or Spider)
Example #2
0
def get_crawler(spidercls=None, settings_dict=None):
    """Return an unconfigured Crawler object. If settings_dict is given, it
    will be used to populate the crawler settings with a project level
    priority.
    """
    from scrapy.crawler import CrawlerRunner
    from scrapy.settings import Settings
    from scrapy.spider import Spider

    runner = CrawlerRunner(Settings(settings_dict))
    return runner._create_crawler(spidercls or Spider)
Example #3
0
        if self.settings.getbool('DNSCACHE_ENABLED'):
            cache_size = self.settings.getint('DNSCACHE_SIZE')
        else:
            cache_size = 0
        print "wwj debug return my_cachingThreadedResolver"
        return my_CachingThreadedResolver(
            reactor=reactor,
            cache_size=cache_size,
            timeout=self.settings.getfloat('DNS_TIMEOUT'))


settings = get_project_settings()
my_process = my_CrawlerProcess(settings)

runner = CrawlerRunner(settings)
#### one runner, more spiders
spidercls = runner.spider_loader.load('scrapy_spider')
my_crawler = runner._create_crawler(spidercls)

my_crawler.spider = my_crawler._create_spider('scrapy_spider')
my_crawler.engine = my_crawler._create_engine()

start_requests = iter(my_crawler.spider.start_requests())
close_if_idle = False
my_crawler.engine.open_spider(my_crawler.spider, start_requests, close_if_idle)
my_crawler.engine.start()

#process.crawl('scrapy_spider')
stop_after_crawl = False
my_process.start(stop_after_crawl)
Example #4
0
            cache_size = self.settings.getint('DNSCACHE_SIZE')
        else:
            cache_size = 0
        print "wwj debug return my_cachingThreadedResolver"
        return my_CachingThreadedResolver(
            reactor=reactor,
            cache_size=cache_size,
            timeout=self.settings.getfloat('DNS_TIMEOUT')
        )


settings = get_project_settings()
my_process = my_CrawlerProcess(settings)

runner = CrawlerRunner(settings)
#### one runner, more spiders 
spidercls = runner.spider_loader.load('scrapy_spider')
my_crawler = runner._create_crawler(spidercls)

my_crawler.spider = my_crawler._create_spider('scrapy_spider')
my_crawler.engine = my_crawler._create_engine()

start_requests = iter(my_crawler.spider.start_requests())
close_if_idle = False
my_crawler.engine.open_spider(my_crawler.spider, start_requests, close_if_idle)
my_crawler.engine.start()

#process.crawl('scrapy_spider')
stop_after_crawl = False
my_process.start(stop_after_crawl)