Ejemplo n.º 1
0
class Test(unittest.TestCase):


    def setUp(self):
        self.client_logger = get_logger(name='cola_test_client', server='localhost')
        self.server_logger = get_logger(name='cola_test_server')
        
        self.log_server = LogRecordSocketReceiver(logger=self.server_logger)
        threading.Thread(target=self.log_server.serve_forever).start()

    def tearDown(self):
        self.log_server.shutdown()
        self.log_server.stop()

    def testLog(self):
        self.client_logger.error('Sth happens here')
        self.client_logger.info('sth info here')
Ejemplo n.º 2
0
Archivo: loader.py Proyecto: Ganer/cola
class MasterJobLoader(LimitionJobLoader, JobLoader):
    def __init__(self, job, data_dir, nodes, local_ip=None, client=None,
                 context=None, copies=1, force=False):
        ctx = context or job.context
        master_port = ctx.job.master_port
        if local_ip is None:
            local_ip = get_ip()
        else:
            choices_ips = get_ips()
            if local_ip not in choices_ips:
                raise ValueError('IP address must be one of (%s)' % ','.join(choices_ips))
        local = '%s:%s' % (local_ip, master_port)
        
        JobLoader.__init__(self, job, data_dir, local, 
                           context=ctx, copies=copies, force=force)
        LimitionJobLoader.__init__(self, job, context=ctx)
        
        # check
        self.check()
        
        self.nodes = nodes
        self.not_registered = self.nodes[:]
        self.not_finished = self.nodes[:]
        
        # mq
        self.mq_client = MessageQueueClient(self.nodes, copies=copies)
        
        # lock
        self.ready_lock = threading.Lock()
        self.ready_lock.acquire()
        self.finish_lock = threading.Lock()
        self.finish_lock.acquire()
        
        # logger
        self.logger = get_logger(
            name='cola_master_%s'%self.job.real_name,
            filename=os.path.join(self.root, 'job.log'),
            is_master=True)
        self.client = client
        self.client_handler = None
        if self.client is not None:
            self.client_handler = add_log_client(self.logger, self.client)
        
        self.init_rpc_server()
        self.init_rate_clear()
        self.init_logger_server(self.logger)
        
        # register rpc server
        self.rpc_server.register_function(self.client_stop, 'client_stop')
        self.rpc_server.register_function(self.ready, 'ready')
        self.rpc_server.register_function(self.worker_finish, 'worker_finish')
        self.rpc_server.register_function(self.complete, 'complete')
        self.rpc_server.register_function(self.error, 'error')
        self.rpc_server.register_function(self.get_nodes, 'get_nodes')
        self.rpc_server.register_function(self.apply, 'apply')
        self.rpc_server.register_function(self.require, 'require')
        self.rpc_server.register_function(self.stop, 'stop')
        self.rpc_server.register_function(self.add_node, 'add_node')
        self.rpc_server.register_function(self.remove_node, 'remove_node')
        
        # register signal
        signal.signal(signal.SIGINT, self.signal_handler)
        signal.signal(signal.SIGTERM, self.signal_handler)
        
    def init_logger_server(self, logger):
        self.log_server = LogRecordSocketReceiver(host=get_ip(), logger=logger)
        threading.Thread(target=self.log_server.serve_forever).start()
        
    def stop_logger_server(self):
        if hasattr(self, 'log_server'):
            self.log_server.shutdown()
            self.log_server.stop()
            
    def client_stop(self):
        if self.client_handler is not None:
            self.logger.removeHandler(self.client_handler)
                
    def check(self):
        env_legal = self.check_env(force=self.force)
        if not env_legal:
            raise JobMasterRunning('There has been a running job master.')
        
    def release_lock(self, lock):
        try:
            lock.release()
        except:
            pass
        
    def finish(self):
        self.release_lock(self.ready_lock)
        self.release_lock(self.finish_lock)
        
        LimitionJobLoader.finish(self)
        JobLoader.finish(self)
        self.stop_logger_server()
        
        try:
            for handler in self.logger.handlers:
                handler.close()
        except:
            pass
            
        if self.client is not None:
            rpc_client = '%s:%s' % (
                self.client.split(':')[0], 
                main_conf.client.port
            )
            client_call(rpc_client, 'stop', ignore=True)
            
        self.stopped = True
        
    def stop(self):
        for node in self.nodes:
            try:
                client_call(node, 'stop')
            except socket.error:
                pass
        self.finish()
        
    def signal_handler(self, signum, frame):
        self.stop()
        
    def get_nodes(self):
        return self.nodes
        
    def ready(self, node):
        if node in self.not_registered:
            self.not_registered.remove(node)
            if len(self.not_registered) == 0:
                self.ready_lock.release()
                
    def worker_finish(self, node):
        if node in self.not_finished:
            self.not_finished.remove(node)
            if len(self.not_finished) == 0:
                self.finish_lock.release()
                
    def add_node(self, node):
        for node in self.nodes:
            client_call(node, 'add_node', node)
        self.nodes.append(node)
        client_call(node, 'run')
        
    def remove_node(self, node):
        for node in self.nodes:
            client_call(node, 'remove_node', node)
        self.nodes.remove(node)
        
    def run(self):
        self.ready_lock.acquire()
        
        if not self.stopped and len(self.not_registered) == 0:
            self.mq_client.put(self.job.starts)
            for node in self.nodes:
                client_call(node, 'run')
            
        self.finish_lock.acquire()
        
        try:
            master_watcher = '%s:%s' % (get_ip(), main_conf.master.port)
            client_call(master_watcher, 'finish_job', self.job.real_name)
        except socket.error:
            pass
        
    def __enter__(self):
        return self
    
    def __exit__(self, type_, value, traceback):
        self.finish()
Ejemplo n.º 3
0
Archivo: loader.py Proyecto: xren/cola
class MasterJobLoader(LimitionJobLoader, JobLoader):
    def __init__(self, job, data_dir, nodes, local_ip=None, client=None,
                 context=None, copies=1, force=False):
        ctx = context or job.context
        master_port = ctx.job.master_port
        if local_ip is None:
            local_ip = get_ip()
        else:
            choices_ips = get_ips()
            if local_ip not in choices_ips:
                raise ValueError('IP address must be one of (%s)' % ','.join(choices_ips))
        local = '%s:%s' % (local_ip, master_port)
        
        JobLoader.__init__(self, job, data_dir, local, 
                           context=ctx, copies=copies, force=force)
        LimitionJobLoader.__init__(self, job, context=ctx)
        
        # check
        self.check()
        
        self.nodes = nodes
        self.not_registered = self.nodes[:]
        self.not_finished = self.nodes[:]
        
        # mq
        self.mq_client = MessageQueueClient(self.nodes, copies=copies)
        
        # lock
        self.ready_lock = threading.Lock()
        self.ready_lock.acquire()
        self.finish_lock = threading.Lock()
        self.finish_lock.acquire()
        
        # logger
        self.logger = get_logger(
            name='cola_master_%s'%self.job.real_name,
            filename=os.path.join(self.root, 'job.log'),
            is_master=True)
        self.client = client
        self.client_handler = None
        if self.client is not None:
            self.client_handler = add_log_client(self.logger, self.client)
        
        self.init_rpc_server()
        self.init_rate_clear()
        self.init_logger_server(self.logger)
        
        # register rpc server
        self.rpc_server.register_function(self.client_stop, 'client_stop')
        self.rpc_server.register_function(self.ready, 'ready')
        self.rpc_server.register_function(self.worker_finish, 'worker_finish')
        self.rpc_server.register_function(self.complete, 'complete')
        self.rpc_server.register_function(self.error, 'error')
        self.rpc_server.register_function(self.get_nodes, 'get_nodes')
        self.rpc_server.register_function(self.apply, 'apply')
        self.rpc_server.register_function(self.require, 'require')
        self.rpc_server.register_function(self.stop, 'stop')
        self.rpc_server.register_function(self.add_node, 'add_node')
        self.rpc_server.register_function(self.remove_node, 'remove_node')
        
        # register signal
        signal.signal(signal.SIGINT, self.signal_handler)
        signal.signal(signal.SIGTERM, self.signal_handler)
        
    def init_logger_server(self, logger):
        self.log_server = LogRecordSocketReceiver(host=get_ip(), logger=logger)
        threading.Thread(target=self.log_server.serve_forever).start()
        
    def stop_logger_server(self):
        if hasattr(self, 'log_server'):
            self.log_server.shutdown()
            self.log_server.stop()
            
    def client_stop(self):
        if self.client_handler is not None:
            self.logger.removeHandler(self.client_handler)
                
    def check(self):
        env_legal = self.check_env(force=self.force)
        if not env_legal:
            raise JobMasterRunning('There has been a running job master.')
        
    def release_lock(self, lock):
        try:
            lock.release()
        except:
            pass
        
    def finish(self):
        self.release_lock(self.ready_lock)
        self.release_lock(self.finish_lock)
        
        LimitionJobLoader.finish(self)
        JobLoader.finish(self)
        self.stop_logger_server()
        
        try:
            for handler in self.logger.handlers:
                handler.close()
        except:
            pass
            
        if self.client is not None:
            rpc_client = '%s:%s' % (
                self.client.split(':')[0], 
                main_conf.client.port
            )
            client_call(rpc_client, 'stop', ignore=True)
            
        self.stopped = True
        
    def stop(self):
        for node in self.nodes:
            client_call(node, 'stop', ignore=True)
        self.finish()
        
    def signal_handler(self, signum, frame):
        self.stop()
        
    def get_nodes(self):
        return self.nodes
        
    def ready(self, node):
        if node in self.not_registered:
            self.not_registered.remove(node)
            if len(self.not_registered) == 0:
                self.ready_lock.release()
                
    def worker_finish(self, node):
        if node in self.not_finished:
            self.not_finished.remove(node)
            if len(self.not_finished) == 0:
                self.finish_lock.release()
                
    def add_node(self, node):
        for node in self.nodes:
            client_call(node, 'add_node', node, ignore=True)
        self.nodes.append(node)
        client_call(node, 'run', ignore=True)
        
    def remove_node(self, node):
        for node in self.nodes:
            client_call(node, 'remove_node', node, ignore=True)
        if node in self.nodes:
            self.nodes.remove(node)
        
    def run(self):
        self.ready_lock.acquire()
        
        if not self.stopped and len(self.not_registered) == 0:
            self.mq_client.put(self.job.starts)
            for node in self.nodes:
                client_call(node, 'run')
            
        self.finish_lock.acquire()
        
        master_watcher = '%s:%s' % (get_ip(), main_conf.master.port)
        client_call(master_watcher, 'finish_job', self.job.real_name, ignore=True)
        
    def __enter__(self):
        return self
    
    def __exit__(self, type_, value, traceback):
        self.finish()