def __init__(self, root, zip_dir, job_dir, ip_address=None, data_path=None, force=False): self.root = root self.zip_dir = zip_dir self.job_dir = job_dir self.data_path = data_path self.force = force self.nodes_watchers = {} self.running_jobs = {} self.black_list = [] if ip_address is None: ip_address = get_ip() else: choices_ips = get_ips() if ip_address not in choices_ips: raise ValueError("IP address must be one of (%s)" % ",".join(choices_ips)) self.ip_address = ip_address self.port = main_conf.master.port self.stopped = False self.check(force=force) self.init_rpc_server() self.rpc_server.register_function(self.register_watcher_heartbeat, "register_heartbeat") self.rpc_server.register_function(self.stop, "stop") self.rpc_server.register_function(self.list_jobs, "list_jobs") self.rpc_server.register_function(self.start_job, "start_job") self.rpc_server.register_function(self.stop_job, "stop_job") self.rpc_server.register_function(self.finish_job, "finish_job") self.rpc_server.register_function(self.clear_job, "clear_job") self.rpc_server.register_function(self.list_job_dirs, "list_job_dirs") self.rpc_server.register_function(self.list_workers, "list_workers") self.set_receiver(zip_dir)
def __init__(self, root, zip_dir, job_dir, ip_address=None, data_path=None, force=False): self.root = root self.zip_dir = zip_dir self.job_dir = job_dir self.data_path = data_path self.force = force self.nodes_watchers = {} self.running_jobs = {} self.black_list = [] if ip_address is None: ip_address = get_ip() else: choices_ips = get_ips() if ip_address not in choices_ips: raise ValueError('IP address must be one of (%s)' % ','.join(choices_ips)) self.ip_address = ip_address self.port = main_conf.master.port self.stopped = False self.check(force=force) self.init_rpc_server() self.rpc_server.register_function(self.register_watcher_heartbeat, 'register_heartbeat') self.rpc_server.register_function(self.stop, 'stop') self.rpc_server.register_function(self.list_jobs, 'list_jobs') self.rpc_server.register_function(self.start_job, 'start_job') self.rpc_server.register_function(self.stop_job, 'stop_job') self.rpc_server.register_function(self.finish_job, 'finish_job') self.rpc_server.register_function(self.clear_job, 'clear_job') self.rpc_server.register_function(self.list_job_dirs, 'list_job_dirs') self.rpc_server.register_function(self.list_workers, 'list_workers') self.set_receiver(zip_dir)
def __init__(self, job, data_dir, nodes, local_ip=None, client=None, context=None, copies=1, force=False): ctx = context or job.context master_port = ctx.job.master_port if local_ip is None: local_ip = get_ip() else: choices_ips = get_ips() if local_ip not in choices_ips: raise ValueError('IP address must be one of (%s)' % ','.join(choices_ips)) local = '%s:%s' % (local_ip, master_port) JobLoader.__init__(self, job, data_dir, local, context=ctx, copies=copies, force=force) LimitionJobLoader.__init__(self, job, context=ctx) # check self.check() self.nodes = nodes self.not_registered = self.nodes[:] self.not_finished = self.nodes[:] # mq self.mq_client = MessageQueueClient(self.nodes, copies=copies) # lock self.ready_lock = threading.Lock() self.ready_lock.acquire() self.finish_lock = threading.Lock() self.finish_lock.acquire() # logger self.logger = get_logger( name='cola_master_%s'%self.job.real_name, filename=os.path.join(self.root, 'job.log'), is_master=True) self.client = client self.client_handler = None if self.client is not None: self.client_handler = add_log_client(self.logger, self.client) self.init_rpc_server() self.init_rate_clear() self.init_logger_server(self.logger) # register rpc server self.rpc_server.register_function(self.client_stop, 'client_stop') self.rpc_server.register_function(self.ready, 'ready') self.rpc_server.register_function(self.worker_finish, 'worker_finish') self.rpc_server.register_function(self.complete, 'complete') self.rpc_server.register_function(self.error, 'error') self.rpc_server.register_function(self.get_nodes, 'get_nodes') self.rpc_server.register_function(self.apply, 'apply') self.rpc_server.register_function(self.require, 'require') self.rpc_server.register_function(self.stop, 'stop') self.rpc_server.register_function(self.add_node, 'add_node') self.rpc_server.register_function(self.remove_node, 'remove_node') # register signal signal.signal(signal.SIGINT, self.signal_handler) signal.signal(signal.SIGTERM, self.signal_handler)
def __init__(self, job, data_dir, nodes, local_ip=None, client=None, context=None, copies=1, force=False): ctx = context or job.context master_port = ctx.job.master_port if local_ip is None: local_ip = get_ip() else: choices_ips = get_ips() if local_ip not in choices_ips: raise ValueError('IP address must be one of (%s)' % ','.join(choices_ips)) local = '%s:%s' % (local_ip, master_port) JobLoader.__init__(self, job, data_dir, local, context=ctx, copies=copies, force=force) LimitionJobLoader.__init__(self, job, context=ctx) # check self.check() self.nodes = nodes self.not_registered = self.nodes[:] self.not_finished = self.nodes[:] # mq self.mq_client = MessageQueueClient(self.nodes, copies=copies) # lock self.ready_lock = threading.Lock() self.ready_lock.acquire() self.finish_lock = threading.Lock() self.finish_lock.acquire() # logger self.logger = get_logger(name='cola_master_%s' % self.job.real_name, filename=os.path.join(self.root, 'job.log'), is_master=True) self.client = client self.client_handler = None if self.client is not None: self.client_handler = add_log_client(self.logger, self.client) self.init_rpc_server() self.init_rate_clear() self.init_logger_server(self.logger) # register rpc server self.rpc_server.register_function(self.client_stop, 'client_stop') self.rpc_server.register_function(self.ready, 'ready') self.rpc_server.register_function(self.worker_finish, 'worker_finish') self.rpc_server.register_function(self.complete, 'complete') self.rpc_server.register_function(self.error, 'error') self.rpc_server.register_function(self.get_nodes, 'get_nodes') self.rpc_server.register_function(self.apply, 'apply') self.rpc_server.register_function(self.require, 'require') self.rpc_server.register_function(self.stop, 'stop') self.rpc_server.register_function(self.add_node, 'add_node') self.rpc_server.register_function(self.remove_node, 'remove_node') # register signal signal.signal(signal.SIGINT, self.signal_handler) signal.signal(signal.SIGTERM, self.signal_handler)