Beispiel #1
0
def process_mq_cleaning(conf):
    global running
    signal.signal(signal.SIGTERM, exit_isr)
    signal.signal(signal.SIGINT, exit_isr)

    # This is only used by thrift, each process has its own EntityExtractor instance

    singletons[EntityExtractor.__name__] = EntityExtractor(conf)
    singletons[EEWorkerPool.__name__] = EEWorkerPool(
        init_count=conf['server']['server_process_num'], conf=conf)

    worker_pool = singletons[EEWorkerPool.__name__]
    worker_pool.start_all()
    heartbeat_thread = HeartbeatThread("entity_extractor", conf)
    heartbeat_thread.start()

    try:
        handler = EntityExtractorHandler(conf, worker_pool)
        processor = EntityExtractorService.Processor(handler)
        transport = TSocket.TServerSocket(port=conf['server']['port'])
        tfactory = TTransport.TBufferedTransportFactory()
        pfactory = TBinaryProtocol.TBinaryProtocolFactory()
        server = InterruptableThreadPoolServer(processor, transport, tfactory,
                                               pfactory)
        server.setNumThreads(1)
        server.serve()
    except Exception, e:
        log.error(str(e))
        log.error(traceback.format_exc())
Beispiel #2
0
def main(conf):
    global running
    signal.signal(signal.SIGTERM, exit_isr)
    signal.signal(signal.SIGINT, exit_isr)

    try:
        pid_file = make_pid_file(sys.path[0])
        try:
            converter = ConvertProccessor(conf)
            selecter = SelectProcessor(conf)
            worker_pool = CrawlerMergeWorkerPool(
                init_count=conf['server']['server_process_num'], conf=conf)
            singletons[CrawlerMergeWorkerPool.__name__] = worker_pool
            worker_pool.start_all()
            running = True
            heartbeat_thread = HeartbeatThread("crawl_merge", conf)
            heartbeat_thread.start()

            handler = CrawlerMergeHandler(conf, converter, selecter)
            processor = CrawlerMergeService.Processor(handler)
            transport = TSocket.TServerSocket(port=conf['server'].get('port'))
            tfactory = TTransport.TBufferedTransportFactory()
            pfactory = TBinaryProtocol.TBinaryProtocolFactory()
            server = InterruptableThreadPoolServer(processor, transport,
                                                   tfactory, pfactory)
            server.setNumThreads(conf['server']['server_thread_num'])
            server.serve()
        except Exception, e:
            conf.get('log').error(traceback.format_exc())
    except:
        pass
    finally:
        remove_pid_file(sys.path[0])
    if running:
        worker_pool.stop_all()
Beispiel #3
0
def main(conf):
    global running
    signal.signal(signal.SIGTERM, exit_isr)
    signal.signal(signal.SIGINT, exit_isr)
    try:
        pid_file = make_pid_file(sys.path[0])
        try:
            datasaver = DataSaver(conf)
            singletons[DataSaver.__name__] = datasaver
            worker_pool = DatasaverWorkerPool(
                init_count=conf['server']['server_process_num'], conf=conf)
            singletons[DatasaverWorkerPool.__name__] = worker_pool
            worker_pool.start_all()
            running = True
            heartbeat_thread = HeartbeatThread("datasaver", conf)
            heartbeat_thread.start()

            handler = DataSaverHandler()
            processor = DataSaverService.Processor(handler)
            transport = TSocket.TServerSocket(port=conf['server']['port'])
            tfactory = TTransport.TBufferedTransportFactory()
            pfactory = TBinaryProtocol.TBinaryProtocolFactory()
            server = InterruptableThreadPoolServer(processor, transport,
                                                   tfactory, pfactory)
            server.setNumThreads(conf['server']['server_thread_num'])
            log.debug("start main thrift")
            server.serve()
        except Exception, e:
            log.debug(traceback.format_exc())
        if running:
            worker_pool.stop_all()
Beispiel #4
0
def main(conf):
    global running
    signal.signal(signal.SIGTERM, exit_isr)
    signal.signal(signal.SIGINT, exit_isr)
    handler = None
    try:
        make_pid_file(sys.path[0])
        try:
            handler = DownloadHandler(conf)
            #多进程启动
            worker_pool = DownloaderWorkerPool(
                init_count=conf['server']['server_process_num'], conf=conf)
            singletons[DownloaderWorkerPool.__name__] = worker_pool
            worker_pool.start_all()
            running = True
            #心跳开始
            heartbeat_thread = HeartbeatThread("downloader", conf)
            heartbeat_thread.start()
            #thrift接口
            processor = DownloadService.Processor(handler)
            transport = TSocket.TServerSocket(port=conf['server'].get('port'))
            tfactory = TTransport.TBufferedTransportFactory()
            pfactory = TBinaryProtocol.TBinaryProtocolFactory()
            server = InterruptableThreadPoolServer(processor, transport,
                                                   tfactory, pfactory)
            server.setNumThreads(conf['server'].get('server_thread_num'))
            server.serve()
        except Exception as e:
            import traceback
            conf.get('log').error(e.message)
        if running:
            worker_pool.stop_all()
        if handler:
            # 必须在server阻塞完成之后调用hander的stop方法去回收phantomjs
            handler.stop()
    except:
        pass
    finally:
        remove_pid_file(sys.path[0])
    os.kill(os.getpid(), signal.SIGKILL)
Beispiel #5
0
class SchedulerServer(object):
    def __init__(self, conf, scheduler):
        self.conf = conf
        self.log = conf['log']
        self.scheduler = scheduler
        self.process_thread_num = conf['server']['process_thread_num']
        thread_locals = {'processor': (SchedulerProccessor, (conf['log'], self.scheduler)),
                         'profiler': (profiler_creator, ())}
        self.process_pool = ThreadPool(self.process_thread_num, thread_locals)
        self.input_thread = InputThread(conf['beanstalk_conf'], conf['log'], self.process_pool)
        self.heartbeat_thread = HeartbeatThread('scheduler', self.conf)

    def start(self):
        self.input_thread.start()
        self.heartbeat_thread.start()
        self.log.info("start_server\tSchedulerServer!")

    def stop(self, message):
        self.input_thread.stop()
        self.heartbeat_thread.stop()
        self.log.info("stop_server\tstatus:%s!" % message)
        exit(1)
Beispiel #6
0
class DownloaderServer(object):
    def __init__(self, conf):
        self.log = conf.get('log')
        thread_locals = {
            'processor': (DownloaderProccessor, (self.log, conf)),
            'profiler': (profiler_creator, ())
        }
        self.process_pool = ThreadPool(
            conf['local_server'].get('process_thread_num'), thread_locals)
        self.input_thread = InputThread(conf.get('beanstalk_conf'), self.log,
                                        self.process_pool)
        self.heartbeat_thread = HeartbeatThread('download', conf)
        self.log.info('初始化线程信息完成..')

    def start(self):
        self.input_thread.start()
        self.heartbeat_thread.start()
        self.log.info("start DownloadServer!")

    def stop(self, message):
        self.log.info("stop DownloadServer %s!" % message)
        self.input_thread.stop()
Beispiel #7
0
def main(conf):
    global running
    signal.signal(signal.SIGTERM, exit_isr)
    signal.signal(signal.SIGINT, exit_isr)
    pid_file = make_pid_file(sys.path[0])
    try:

        # This is only used by thrift, each process has its own EntityExtractor instance
        singletons[SingleSourceMerger.__name__] = SingleSourceMerger(conf)
        singletons[SMWorkerPool.__name__] = SMWorkerPool(
            init_count=conf['server']['server_process_num'], conf=conf)
        singletons[RecordLockManager.__name__] = RecordLockManager(
            conf,
            prefix='single-src',
            backlog_path=os.path.join(crawler_basic_path, 'ss_backlog'))

        worker_pool = singletons[SMWorkerPool.__name__]
        worker_pool.start_all()
        conf['log'] = log
        heartbeat_thread = HeartbeatThread("single_src_merge", conf)
        heartbeat_thread.start()
        try:
            handler = SMThriftHandler(conf, worker_pool)
            processor = SingleSourceMergeService.Processor(handler)
            transport = TSocket.TServerSocket(port=conf['server']['port'])
            tfactory = TTransport.TBufferedTransportFactory()
            pfactory = TBinaryProtocol.TBinaryProtocolFactory()
            server = InterruptableThreadPoolServer(processor, transport,
                                                   tfactory, pfactory)
            server.setNumThreads(1)
            server.serve()
        except Exception, e:
            log.error(str(e))
            log.error(traceback.format_exc())
        if running:
            worker_pool.stop_all()