def setUp(self): from scrapy import log import sys from cStringIO import StringIO self.f = StringIO() self.sflo = log.ScrapyFileLogObserver(self.f, self.level, self.encoding) self.sflo.start()
def setUp(self): self.f = BytesIO() self.crawler = get_crawler(Spider) self.spider = self.crawler.spider = self.crawler._create_spider('test') self.log_observer = log.ScrapyFileLogObserver(self.f, log.INFO, 'utf-8', self.crawler) self.log_observer.start()
def crawl(cls, sites): stat = {"spiders": 0} def soft_stop_reactor(): stat["spiders"] -= 1 if not stat["spiders"]: reactor.stop() for site in sites: try: spider = site.parser.spider(site) except (NotImplementedError, ObjectDoesNotExist): logger.error( _('Spider not implemented for "%s" site', site.label)) else: stat["spiders"] += 1 with spider_project(spider) as settings: crawler = Crawler(settings) crawler.signals.connect( soft_stop_reactor, signal=signals.spider_closed) # reactor.stop crawler.configure() crawler.crawl(spider) crawler.start() logfile = open('crawl.log', 'w') log_observer = log.ScrapyFileLogObserver(logfile, level=logging.INFO) log_observer.start() # the script will block here until the spider_closed signal was sent reactor.run()
def setUp(self): pass from scrapy.spider import Spider from scrapy.utils.test import get_crawler from scrapy.crawler import Crawler #self.crawler = get_crawler(self.settings_dict) self.crawler = Crawler(get_project_settings()) self.spider = Spider('foo') from scrapy import log import sys from cStringIO import StringIO self.level = log.INFO self.level = log.DEBUG self.encoding = 'utf-8' self.f = StringIO() self.f = sys.stdout self.sflo = log.ScrapyFileLogObserver(self.f, self.level, self.encoding) self.sflo.start()
def __init__(self, category=None, *args, **kwargs): super(HabrSpider, self).__init__(*args, **kwargs) log.ScrapyFileLogObserver(open('debug.log', 'w'), level=log.DEBUG).start() log.ScrapyFileLogObserver(open('error.log', 'w'), level=log.ERROR).start()
def setUp(self): self.f = StringIO() self.log_observer = log.ScrapyFileLogObserver(self.f, self.level, self.encoding) self.log_observer.start()
# Crawl responsibly by identifying yourself (and your website) on the user-agent USER_AGENT = "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36" # LOG #Minimum level to log. Available levels are: CRITICAL, ERROR, WARNING, INFO, DEBUG. LOG_FILE = '/tmp/scrapy.log' LOG_LEVEL = 'INFO' LOG_ENABLED = True LOG_STDOUT = True DUPEFILTER_CLASS = 'tobey.filters.duplicate_filter.SqliteDupeFilter' import logging from scrapy import log logfile = open('/tmp/observer.log', 'w+') observer = log.ScrapyFileLogObserver(logfile, level=logging.INFO) observer.start() #SQLITE_PATH='/Users/zhanghui/github/startup/spider.appcheckin.com/' SQLITE_PATH='/tmp/' WP_USER = "******" WP_PASSWORD = "******" WP_URL = "http://www.fx-dev.com/xmlrpc.php" WP_BLOGID = "" WP_TRANSPORT = "" GOTO_SOURCE = u'<p><a href="%s" title="良心跳转" target="_blank">[查看原文]</a></p>'
def run(self, args, opts): if opts.logstderr: level = log._get_log_level(self.settings['LOG_LEVEL']) observer = log.ScrapyFileLogObserver(sys.stderr, level=level) observer.start() super(Command, self).run(args, opts)