Example #1
0
    def __init__(self, root, zip_dir, job_dir, ip_address=None, data_path=None, force=False):
        self.root = root
        self.zip_dir = zip_dir
        self.job_dir = job_dir
        self.data_path = data_path
        self.force = force

        self.nodes_watchers = {}
        self.running_jobs = {}
        self.black_list = []
        if ip_address is None:
            ip_address = get_ip()
        else:
            choices_ips = get_ips()
            if ip_address not in choices_ips:
                raise ValueError("IP address must be one of (%s)" % ",".join(choices_ips))
        self.ip_address = ip_address
        self.port = main_conf.master.port

        self.stopped = False

        self.check(force=force)
        self.init_rpc_server()

        self.rpc_server.register_function(self.register_watcher_heartbeat, "register_heartbeat")
        self.rpc_server.register_function(self.stop, "stop")
        self.rpc_server.register_function(self.list_jobs, "list_jobs")
        self.rpc_server.register_function(self.start_job, "start_job")
        self.rpc_server.register_function(self.stop_job, "stop_job")
        self.rpc_server.register_function(self.finish_job, "finish_job")
        self.rpc_server.register_function(self.clear_job, "clear_job")
        self.rpc_server.register_function(self.list_job_dirs, "list_job_dirs")
        self.rpc_server.register_function(self.list_workers, "list_workers")

        self.set_receiver(zip_dir)
    def __init__(self,
                 root,
                 zip_dir,
                 job_dir,
                 ip_address=None,
                 data_path=None,
                 force=False):
        self.root = root
        self.zip_dir = zip_dir
        self.job_dir = job_dir
        self.data_path = data_path
        self.force = force

        self.nodes_watchers = {}
        self.running_jobs = {}
        self.black_list = []
        if ip_address is None:
            ip_address = get_ip()
        else:
            choices_ips = get_ips()
            if ip_address not in choices_ips:
                raise ValueError('IP address must be one of (%s)' %
                                 ','.join(choices_ips))
        self.ip_address = ip_address
        self.port = main_conf.master.port

        self.stopped = False

        self.check(force=force)
        self.init_rpc_server()

        self.rpc_server.register_function(self.register_watcher_heartbeat,
                                          'register_heartbeat')
        self.rpc_server.register_function(self.stop, 'stop')
        self.rpc_server.register_function(self.list_jobs, 'list_jobs')
        self.rpc_server.register_function(self.start_job, 'start_job')
        self.rpc_server.register_function(self.stop_job, 'stop_job')
        self.rpc_server.register_function(self.finish_job, 'finish_job')
        self.rpc_server.register_function(self.clear_job, 'clear_job')
        self.rpc_server.register_function(self.list_job_dirs, 'list_job_dirs')
        self.rpc_server.register_function(self.list_workers, 'list_workers')

        self.set_receiver(zip_dir)
Example #3
0
File: loader.py Project: Ganer/cola
 def __init__(self, job, data_dir, nodes, local_ip=None, client=None,
              context=None, copies=1, force=False):
     ctx = context or job.context
     master_port = ctx.job.master_port
     if local_ip is None:
         local_ip = get_ip()
     else:
         choices_ips = get_ips()
         if local_ip not in choices_ips:
             raise ValueError('IP address must be one of (%s)' % ','.join(choices_ips))
     local = '%s:%s' % (local_ip, master_port)
     
     JobLoader.__init__(self, job, data_dir, local, 
                        context=ctx, copies=copies, force=force)
     LimitionJobLoader.__init__(self, job, context=ctx)
     
     # check
     self.check()
     
     self.nodes = nodes
     self.not_registered = self.nodes[:]
     self.not_finished = self.nodes[:]
     
     # mq
     self.mq_client = MessageQueueClient(self.nodes, copies=copies)
     
     # lock
     self.ready_lock = threading.Lock()
     self.ready_lock.acquire()
     self.finish_lock = threading.Lock()
     self.finish_lock.acquire()
     
     # logger
     self.logger = get_logger(
         name='cola_master_%s'%self.job.real_name,
         filename=os.path.join(self.root, 'job.log'),
         is_master=True)
     self.client = client
     self.client_handler = None
     if self.client is not None:
         self.client_handler = add_log_client(self.logger, self.client)
     
     self.init_rpc_server()
     self.init_rate_clear()
     self.init_logger_server(self.logger)
     
     # register rpc server
     self.rpc_server.register_function(self.client_stop, 'client_stop')
     self.rpc_server.register_function(self.ready, 'ready')
     self.rpc_server.register_function(self.worker_finish, 'worker_finish')
     self.rpc_server.register_function(self.complete, 'complete')
     self.rpc_server.register_function(self.error, 'error')
     self.rpc_server.register_function(self.get_nodes, 'get_nodes')
     self.rpc_server.register_function(self.apply, 'apply')
     self.rpc_server.register_function(self.require, 'require')
     self.rpc_server.register_function(self.stop, 'stop')
     self.rpc_server.register_function(self.add_node, 'add_node')
     self.rpc_server.register_function(self.remove_node, 'remove_node')
     
     # register signal
     signal.signal(signal.SIGINT, self.signal_handler)
     signal.signal(signal.SIGTERM, self.signal_handler)
Example #4
0
    def __init__(self,
                 job,
                 data_dir,
                 nodes,
                 local_ip=None,
                 client=None,
                 context=None,
                 copies=1,
                 force=False):
        ctx = context or job.context
        master_port = ctx.job.master_port
        if local_ip is None:
            local_ip = get_ip()
        else:
            choices_ips = get_ips()
            if local_ip not in choices_ips:
                raise ValueError('IP address must be one of (%s)' %
                                 ','.join(choices_ips))
        local = '%s:%s' % (local_ip, master_port)

        JobLoader.__init__(self,
                           job,
                           data_dir,
                           local,
                           context=ctx,
                           copies=copies,
                           force=force)
        LimitionJobLoader.__init__(self, job, context=ctx)

        # check
        self.check()

        self.nodes = nodes
        self.not_registered = self.nodes[:]
        self.not_finished = self.nodes[:]

        # mq
        self.mq_client = MessageQueueClient(self.nodes, copies=copies)

        # lock
        self.ready_lock = threading.Lock()
        self.ready_lock.acquire()
        self.finish_lock = threading.Lock()
        self.finish_lock.acquire()

        # logger
        self.logger = get_logger(name='cola_master_%s' % self.job.real_name,
                                 filename=os.path.join(self.root, 'job.log'),
                                 is_master=True)
        self.client = client
        self.client_handler = None
        if self.client is not None:
            self.client_handler = add_log_client(self.logger, self.client)

        self.init_rpc_server()
        self.init_rate_clear()
        self.init_logger_server(self.logger)

        # register rpc server
        self.rpc_server.register_function(self.client_stop, 'client_stop')
        self.rpc_server.register_function(self.ready, 'ready')
        self.rpc_server.register_function(self.worker_finish, 'worker_finish')
        self.rpc_server.register_function(self.complete, 'complete')
        self.rpc_server.register_function(self.error, 'error')
        self.rpc_server.register_function(self.get_nodes, 'get_nodes')
        self.rpc_server.register_function(self.apply, 'apply')
        self.rpc_server.register_function(self.require, 'require')
        self.rpc_server.register_function(self.stop, 'stop')
        self.rpc_server.register_function(self.add_node, 'add_node')
        self.rpc_server.register_function(self.remove_node, 'remove_node')

        # register signal
        signal.signal(signal.SIGINT, self.signal_handler)
        signal.signal(signal.SIGTERM, self.signal_handler)