def start(self): """启动这个worker 启动的时候,会将spider中的start_tasks移到待抓取队列 不会重复启动 """ if self.is_started: self.logger.warn("duplicate start") else: self.is_started = True self.worker_statistic.start_time = datetime.datetime.now() try: RecorderManager.instance().record_doing( record( self._worker_name, self.worker_statistic.start_time.strftime( "%Y-%m-%d %H:%M:%S"), get_class_path(self.spider.crawl_schedule.__class__), self.spider.crawl_schedule.schedule_kwargs, get_class_path(self.spider.__class__), self.spider.spider_kwargs)) except Exception, e: self.logger.warn("record worker failed:%s" % e) _move_start_tasks_to_crawl_schedule(self.spider.start_tasks, self.spider.crawl_schedule) ioloop.IOLoop.instance().add_timeout( datetime.timedelta( milliseconds=self.spider.crawl_schedule.interval), self.loop_get_and_execute) self.logger.info("start worker")
def start(self): """启动这个worker 启动的时候,会将spider中的start_tasks移到待抓取队列 不会重复启动 """ if self.is_started: self.logger.warn("duplicate start") else: self.is_started = True self.worker_statistic.start_time = datetime.datetime.now() try: RecorderManager.instance().record_doing( record( self._worker_name, self.worker_statistic.start_time. strftime("%Y-%m-%d %H:%M:%S"), get_class_path(self.spider.crawl_schedule.__class__), self.spider.crawl_schedule.schedule_kwargs, get_class_path(self.spider.__class__), self.spider.spider_kwargs)) except Exception, e: self.logger.warn("record worker failed:%s" % e) _move_start_tasks_to_crawl_schedule(self.spider.start_tasks, self.spider.crawl_schedule) ioloop.IOLoop.instance().add_timeout( datetime.timedelta( milliseconds=self.spider.crawl_schedule.interval), self.loop_get_and_execute) self.logger.info("start worker")
def recover(self): """以恢复模式启动这个worker 不会重复启动 """ if self.is_started: self.logger.warn("duplicate start") else: self.worker_statistic.start_time = datetime.datetime.now() RecorderManager.instance().record_doing( record(self._worker_name, self.worker_statistic. start_time.strftime("%Y-%m-%d %H:%M:%S"), get_class_path(self.spider.crawl_schedule.__class__), self.spider.crawl_schedule.schedule_kwargs, get_class_path(self.spider.__class__), self.spider.spider_kwargs)) self.is_started = True ioloop.IOLoop.instance().add_timeout( datetime.timedelta(milliseconds= self.spider.crawl_schedule.interval), self.loop_get_and_execute) self.logger.info("recover worker")
def recover(self): """以恢复模式启动这个worker 不会重复启动 """ if self.is_started: self.logger.warn("duplicate start") else: self.worker_statistic.start_time = datetime.datetime.now() RecorderManager.instance().record_doing( record( self._worker_name, self.worker_statistic.start_time.strftime( "%Y-%m-%d %H:%M:%S"), get_class_path(self.spider.crawl_schedule.__class__), self.spider.crawl_schedule.schedule_kwargs, get_class_path(self.spider.__class__), self.spider.spider_kwargs)) self.is_started = True ioloop.IOLoop.instance().add_timeout( datetime.timedelta( milliseconds=self.spider.crawl_schedule.interval), self.loop_get_and_execute) self.logger.info("recover worker")
import shutil import hashlib import logging import socketserver base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.append(base_dir) from core.record import record from data.conf.configure import HomeDocs, username, password from data.dict.dict import Stand_msg ''' # 拼接文件日志,格式为 %Y-%m-%d.txt ''' timer = time.strftime("%Y-%m-%d") logger = record(base_dir + "\\data\\log\\" + timer + ".txt") class MyTCPHandler(socketserver.BaseRequestHandler): ''' # 解析连接请求参数 ''' def setup(self): auth_msg_source = self.request.recv(1024).strip() self.auth_msg = json.loads(auth_msg_source) if (self.auth_msg.get('type')) == 'auth': self.username = self.auth_msg['username'] self.md5_password = self.auth_msg['password'] self.ipaddr = self.auth_msg['ipaddr'] self.role = self.auth_msg['auth_tag'] logger.info("用户名: %s尝试从终端 %s 登录服务器" % (self.username, self.ipaddr))