Beispiel #1
0
    def spider_closed(self, spider, reason):
        if self.enabled and hasattr(spider, 'logger'):
            try:
                uri = spider.logger
                if not uri:
                    return

                log.msg('post bot stats to <{}>'.format(uri))
                cnn, db, tbl = utils.connect_uri(uri)

                ago = self.stats.get_value('start_time', datetime.utcnow())
                now = datetime.utcnow()

                self.stats.set_value('finish_time', now, spider=spider)
                self.stats.set_value('elapsed_time',
                                     (now - ago).total_seconds(),
                                     spider=spider)
                self.stats.set_value('finish_reason', reason, spider=spider)
                self.stats.set_value('bot_ip', utils.get_ipaddr())
                self.stats.set_value(
                    'bot_name',
                    self.crawler.settings.get('BOT_NAME', 'unknown'))
                self.stats.set_value('spider_name', spider.name)
                self.stats.set_value('config_path', spider.config)
                self.stats.set_value('job_id', os.getenv('SCRAPY_JOB', None))

                tbl.insert({
                    k.replace('.', '_'): v
                    for k, v in self.stats.get_stats().iteritems()
                })
                cnn.close()
            except Exception as ex:
                log.err('cannot post bot stats')
Beispiel #2
0
    def spider_closed(self, spider, reason):
        if self.enabled and hasattr(spider, 'logger'):
            try:
                uri = spider.logger
                if not uri:
                    return

                log.msg('post bot stats to <{}>'.format(uri))
                cnn, db, tbl = utils.connect_uri(uri)

                ago = self.stats.get_value('start_time', datetime.utcnow())
                now = datetime.utcnow()

                self.stats.set_value('finish_time', now, spider=spider)
                self.stats.set_value('elapsed_time', (now-ago).total_seconds(), spider=spider)
                self.stats.set_value('finish_reason', reason, spider=spider)
                self.stats.set_value('bot_ip', utils.get_ipaddr())
                self.stats.set_value('bot_name', self.crawler.settings.get('BOT_NAME', 'unknown'))
                self.stats.set_value('spider_name', spider.name)
                self.stats.set_value('config_path', spider.config)
                self.stats.set_value('job_id', os.getenv('SCRAPY_JOB', None))

                tbl.insert({k.replace('.', '_'):v for k,v in self.stats.get_stats().iteritems()})
                cnn.close()
            except Exception as ex:
                log.err('cannot post bot stats')
Beispiel #3
0
    def open_spider(self, spider):
        if hasattr(spider, 'mongo'):
            try:
                self.upsert_keys = self.get_upsert_keys()
                uri = spider.mongo
                log.msg('connect <{}>'.format(uri))
                self.cnn, self.db, self.tbl = utils.connect_uri(uri)
                return
            except Exception as ex:
                log.err('cannot connect to mongodb: {}'.format(ex))

        self.cnn = self.db = None
Beispiel #4
0
    def open_spider(self, spider):
        if hasattr(spider, 'mongo'):
            try:
                self.upsert_keys = self.get_upsert_keys()
                uri = spider.mongo
                log.msg('connect <{}>'.format(uri))
                self.cnn, self.db, self.tbl = utils.connect_uri(uri)
                return
            except Exception as ex:
                log.err('cannot connect to mongodb: {}'.format(ex))

        self.cnn = self.db = None
Beispiel #5
0
    def open_spider(self, spider):
        if hasattr(spider, 'mysql'):
            try:
                uri = spider.mysql
                log.msg('connect <{}>'.format(uri))
                self.cnn, _, self.tbl = utils.connect_uri(uri)
                self.cur = self.cnn.cursor()
                return
            except Exception as ex:
                traceback.print_exc()
                log.err('cannot connect to mysql: {}'.format(ex))

        self.cnn = self.cur = None
Beispiel #6
0
    def open_spider(self, spider):
        if hasattr(spider, 'mysql'):
            try:
                uri = spider.mysql
                log.msg('connect <{}>'.format(uri))
                self.cnn, _, self.tbl = utils.connect_uri(uri)
                self.cur = self.cnn.cursor()
                return
            except Exception as ex:
                traceback.print_exc()
                log.err('cannot connect to mysql: {}'.format(ex))

        self.cnn = self.cur = None