Beispiel #1
0
 def setUp(self):
   from scrapy import log
   import sys
   from cStringIO import StringIO
   self.f = StringIO()
   self.sflo = log.ScrapyFileLogObserver(self.f, self.level, self.encoding)
   self.sflo.start()
Beispiel #2
0
 def setUp(self):
     self.f = BytesIO()
     self.crawler = get_crawler(Spider)
     self.spider = self.crawler.spider = self.crawler._create_spider('test')
     self.log_observer = log.ScrapyFileLogObserver(self.f, log.INFO,
                                                   'utf-8', self.crawler)
     self.log_observer.start()
Beispiel #3
0
    def crawl(cls, sites):
        stat = {"spiders": 0}

        def soft_stop_reactor():
            stat["spiders"] -= 1
            if not stat["spiders"]:
                reactor.stop()

        for site in sites:
            try:
                spider = site.parser.spider(site)
            except (NotImplementedError, ObjectDoesNotExist):
                logger.error(
                    _('Spider not implemented for "%s" site', site.label))
            else:
                stat["spiders"] += 1
                with spider_project(spider) as settings:
                    crawler = Crawler(settings)
                    crawler.signals.connect(
                        soft_stop_reactor,
                        signal=signals.spider_closed)  # reactor.stop
                    crawler.configure()
                    crawler.crawl(spider)
                    crawler.start()

        logfile = open('crawl.log', 'w')
        log_observer = log.ScrapyFileLogObserver(logfile, level=logging.INFO)
        log_observer.start()

        # the script will block here until the spider_closed signal was sent
        reactor.run()
Beispiel #4
0
  def setUp(self):
    pass
    from scrapy.spider import Spider
    from scrapy.utils.test import get_crawler
    from scrapy.crawler import Crawler
    #self.crawler = get_crawler(self.settings_dict)
    self.crawler = Crawler(get_project_settings())
    self.spider = Spider('foo')

    from scrapy import log
    import sys
    from cStringIO import StringIO
    self.level = log.INFO
    self.level = log.DEBUG
    self.encoding = 'utf-8'
    self.f = StringIO()
    self.f = sys.stdout
    self.sflo = log.ScrapyFileLogObserver(self.f, self.level, self.encoding)
    self.sflo.start()
Beispiel #5
0
 def __init__(self, category=None, *args, **kwargs):
     super(HabrSpider, self).__init__(*args, **kwargs)
     log.ScrapyFileLogObserver(open('debug.log', 'w'), level=log.DEBUG).start()
     log.ScrapyFileLogObserver(open('error.log', 'w'), level=log.ERROR).start()
Beispiel #6
0
 def setUp(self):
     self.f = StringIO()
     self.log_observer = log.ScrapyFileLogObserver(self.f, self.level,
                                                   self.encoding)
     self.log_observer.start()
Beispiel #7
0
# Crawl responsibly by identifying yourself (and your website) on the user-agent
USER_AGENT = "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36"

# LOG
#Minimum level to log. Available levels are: CRITICAL, ERROR, WARNING, INFO, DEBUG.
LOG_FILE = '/tmp/scrapy.log'
LOG_LEVEL = 'INFO'
LOG_ENABLED = True
LOG_STDOUT = True

DUPEFILTER_CLASS = 'tobey.filters.duplicate_filter.SqliteDupeFilter'


import logging
from scrapy import log
logfile = open('/tmp/observer.log', 'w+')
observer = log.ScrapyFileLogObserver(logfile, level=logging.INFO)
observer.start()

#SQLITE_PATH='/Users/zhanghui/github/startup/spider.appcheckin.com/'
SQLITE_PATH='/tmp/'
WP_USER = "******"
WP_PASSWORD = "******"
WP_URL = "http://www.fx-dev.com/xmlrpc.php"
WP_BLOGID = ""
WP_TRANSPORT = ""
GOTO_SOURCE = u'<p><a href="%s" title="良心跳转" target="_blank">[查看原文]</a></p>'


Beispiel #8
0
 def run(self, args, opts):
     if opts.logstderr:
         level = log._get_log_level(self.settings['LOG_LEVEL'])
         observer = log.ScrapyFileLogObserver(sys.stderr, level=level)
         observer.start()
     super(Command, self).run(args, opts)