def recover(job_path): job = import_job(job_path) data_path = os.path.join(root_dir(), 'data') root = os.path.join(data_path, 'worker', 'jobs', job.real_name) if os.path.exists(root): lock_path = os.path.join(root, 'lock') if os.path.exists(lock_path): os.remove(lock_path) def _recover_dir(dir_): for f in os.listdir(dir_): if f.endswith('.old'): f_path = os.path.join(dir_, f) os.remove(f_path) for f in os.listdir(dir_): if f == 'lock': lock_f = os.path.join(dir_, f) os.remove(lock_f) f_path = os.path.join(dir_, f) if os.path.isfile(f_path) and not f.endswith('.old'): os.rename(f_path, f_path + '.old') mq_store_dir = os.path.join(root, 'store') mq_backup_dir = os.path.join(root, 'backup') if os.path.exists(mq_store_dir): _recover_dir(mq_store_dir) if os.path.exists(mq_backup_dir): _recover_dir(mq_backup_dir)
def start_master(): path = os.path.join(root_dir(), 'cola', 'master', 'watcher.py') print 'Start master at %s:%s' % (get_ip(), main_conf.master.port) print 'Master will run in background.' subprocess.Popen(['python', path])
def recover(job_path): job = import_job(job_path) data_path = os.path.join(root_dir(), 'data') root = os.path.join(data_path, 'worker', 'jobs', job.real_name) if os.path.exists(root): lock_path = os.path.join(root, 'lock') if os.path.exists(lock_path): os.remove(lock_path) def _recover_dir(dir_): for f in os.listdir(dir_): if f.endswith('.old'): f_path = os.path.join(dir_, f) os.remove(f_path) for f in os.listdir(dir_): if f == 'lock': lock_f = os.path.join(dir_, f) os.remove(lock_f) f_path = os.path.join(dir_, f) if os.path.isfile(f_path) and not f.endswith('.old'): os.rename(f_path, f_path+'.old') mq_store_dir = os.path.join(root, 'store') mq_backup_dir = os.path.join(root, 'backup') if os.path.exists(mq_store_dir): _recover_dir(mq_store_dir) if os.path.exists(mq_backup_dir): _recover_dir(mq_backup_dir)
def load_job(path, nodes, context=None): if not os.path.exists(path): raise ValueError('Job definition does not exist.') job = import_job(path) job_name = job.name.replace(' ', '_') if job.debug: job_name += '_debug' holder = os.path.join(root_dir(), 'data', 'master', 'jobs', job_name) if not os.path.exists(holder): os.makedirs(holder) lock_f = os.path.join(holder, 'lock') if os.path.exists(lock_f): raise JobMasterRunning('There has been a running job master') open(lock_f, 'w').close() rpc_server = create_rpc_server(job) try: loader = JobLoader(job, nodes, rpc_server, context=context) loader.run() # nofify master watcher finishing master_watcher = '%s:%s' % (get_ip(), main_conf.master.port) client_call(master_watcher, 'finish_job', job.real_name) finally: os.remove(lock_f) rpc_server.shutdown()
def load_job(job_path, nodes, ip_address=None, data_path=None, client=None, context=None, force=False): if not os.path.exists(job_path): raise ValueError('Job definition does not exist.') job = import_job(job_path) if data_path is None: data_path = os.path.join(root_dir(), 'data') root = os.path.join(data_path, 'master', 'jobs', job.real_name) if not os.path.exists(root): os.makedirs(root) with MasterJobLoader(job, root, nodes, local_ip=ip_address, client=client, context=context, force=force) as job_loader: job_loader.run()
def start_master(data_path=None, force=False): path = os.path.join(root_dir(), "cola", "master", "watcher.py") print "Start master at %s:%s" % (get_ip(), main_conf.master.port) print "Master will run in background. Please do not shut down the terminal." cmds = ["python", path] if data_path is not None: cmds.extend(["-d", data_path]) if force is True: cmds.append("-f") subprocess.Popen(cmds)
def start_worker(master, data_path=None, force=False): path = os.path.join(root_dir(), 'cola', 'worker', 'watcher.py') print 'Start worker at %s:%s' % (get_ip(), main_conf.worker.port) print 'Worker will run in background. Please do not shut down the terminal.' cmds = ['python', path, '-m', master] if data_path is not None: cmds.extend(['-d', data_path]) if force is True: cmds.append('-f') subprocess.Popen(cmds)
def load_job(path, master=None): if not os.path.exists(path): raise ValueError('Job definition does not exist.') job = import_job(path) holder = os.path.join(root_dir(), 'data', 'worker', 'jobs', job.real_name) mq_holder = os.path.join(holder, 'mq') if not os.path.exists(mq_holder): os.makedirs(mq_holder) # Logger logger = get_logger(os.path.join(holder, 'job.log')) local_node = '%s:%s' % (get_ip(), job.context.job.port) nodes = [local_node] if master is not None: nodes = client_call(master, 'get_nodes') # Bloom filter hook bloom_filter_file = os.path.join(holder, 'bloomfilter') bloom_filter_hook = create_bloom_filter_hook(bloom_filter_file, job) rpc_server = create_rpc_server(job) loader = JobLoader(job, rpc_server, logger=logger, master=master) loader.init_mq(nodes, local_node, mq_holder, verify_exists_hook=bloom_filter_hook, copies=2 if master else 1) if master is None: try: loader.mq.put(job.starts) loader.run() finally: rpc_server.shutdown() else: try: client_call(master, 'ready', local_node) def _start(): while not loader.stopped: time.sleep(TIME_SLEEP) loader.run() thread = threading.Thread(target=_start) thread.start() thread.join() finally: rpc_server.shutdown()
def start_master(ip=None, data_path=None, force=False): path = os.path.join(root_dir(), 'cola', 'master', 'watcher.py') print 'Start master at %s:%s' % (ip, main_conf.master.port) print 'Master will run in background. Please do not shut down the terminal.' cmds = ['python', path] if ip is not None: cmds.extend(['-i', ip]) if data_path is not None: cmds.extend(['-d', data_path]) if force is True: cmds.append('-f') subprocess.Popen(cmds)
def load_job(path, master=None): if not os.path.exists(path): raise ValueError('Job definition does not exist.') job = import_job(path) holder = os.path.join( root_dir(), 'data', 'worker', 'jobs', job.real_name) mq_holder = os.path.join(holder, 'mq') if not os.path.exists(mq_holder): os.makedirs(mq_holder) # Logger logger = get_logger(os.path.join(holder, 'job.log')) local_node = '%s:%s' % (get_ip(), job.context.job.port) nodes = [local_node] if master is not None: nodes = client_call(master, 'get_nodes') # Bloom filter hook bloom_filter_file = os.path.join(holder, 'bloomfilter') bloom_filter_hook = create_bloom_filter_hook(bloom_filter_file, job) rpc_server = create_rpc_server(job) loader = JobLoader(job, rpc_server, logger=logger, master=master) loader.init_mq(nodes, local_node, mq_holder, verify_exists_hook=bloom_filter_hook, copies=2 if master else 1) if master is None: try: loader.mq.put(job.starts) loader.run() finally: rpc_server.shutdown() else: try: client_call(master, 'ready', local_node) def _start(): while not loader.stopped: time.sleep(TIME_SLEEP) loader.run() thread = threading.Thread(target=_start) thread.start() thread.join() finally: rpc_server.shutdown()
def start_master(ip=None, data_path=None, force=False): path = os.path.join(root_dir(), 'cola', 'master', 'watcher.py') ip_str = ip if ip is not None else get_ip() print 'Start master at %s:%s' % (ip_str, main_conf.master.port) print 'Master will run in background. Please do not shut down the terminal.' cmds = ['python', path] if ip is not None: cmds.extend(['-i', ip]) if data_path is not None: cmds.extend(['-d', data_path]) if force is True: cmds.append('-f') subprocess.Popen(cmds)
def setUp(self): self.dir = tempfile.mkdtemp() self.root = os.path.join(self.dir, "watch") if not os.path.exists(self.root): os.mkdir(self.root) self.zip_dir = os.path.join(self.dir, "zip") if not os.path.exists(self.zip_dir): os.mkdir(self.zip_dir) self.job_dir = os.path.join(self.dir, "job") if not os.path.exists(self.job_dir): os.mkdir(self.job_dir) zip_file = os.path.join(self.zip_dir, "wiki.zip") src_dir = os.path.join(root_dir(), "contrib", "wiki") self.zip_file = ZipHandler.compress(zip_file, src_dir, type_filters=("pyc",)) self.master_watcher = MasterWatcher(self.root, self.zip_dir, self.job_dir)
def setUp(self): self.dir = tempfile.mkdtemp() self.zip_dir = os.path.join(self.dir, "zip") if not os.path.exists(self.zip_dir): os.mkdir(self.zip_dir) self.job_dir = os.path.join(self.dir, "job") if not os.path.exists(self.job_dir): os.mkdir(self.job_dir) zip_file = os.path.join(self.zip_dir, "wiki.zip") src_dir = os.path.join(root_dir(), "contrib", "wiki") self.zip_file = ZipHandler.compress(zip_file, src_dir, type_filters=("pyc",)) self.rpc_server = ColaRPCServer(("localhost", main_conf.master.port)) self.master_watcher = MasterWatcher(self.rpc_server, self.zip_dir, self.job_dir) thd = threading.Thread(target=self.rpc_server.serve_forever) thd.setDaemon(True) thd.start()
def setUp(self): self.dir = tempfile.mkdtemp() self.root = os.path.join(self.dir, 'watch') if not os.path.exists(self.root): os.mkdir(self.root) self.zip_dir = os.path.join(self.dir, 'zip') if not os.path.exists(self.zip_dir): os.mkdir(self.zip_dir) self.job_dir = os.path.join(self.dir, 'job') if not os.path.exists(self.job_dir): os.mkdir(self.job_dir) zip_file = os.path.join(self.zip_dir, 'wiki.zip') src_dir = os.path.join(root_dir(), 'contrib', 'wiki') self.zip_file = ZipHandler.compress(zip_file, src_dir, type_filters=('pyc', )) self.master_watcher = MasterWatcher(self.root, self.zip_dir, self.job_dir)
def setUp(self): self.dir = tempfile.mkdtemp() self.zip_dir = os.path.join(self.dir, 'zip') if not os.path.exists(self.zip_dir): os.mkdir(self.zip_dir) self.job_dir = os.path.join(self.dir, 'job') if not os.path.exists(self.job_dir): os.mkdir(self.job_dir) zip_file = os.path.join(self.zip_dir, 'wiki.zip') src_dir = os.path.join(root_dir(), 'contrib', 'wiki') self.zip_file = ZipHandler.compress(zip_file, src_dir, type_filters=('pyc', )) self.rpc_server = ColaRPCServer(('localhost', main_conf.master.port)) self.master_watcher = MasterWatcher(self.rpc_server, self.zip_dir, self.job_dir) thd = threading.Thread(target=self.rpc_server.serve_forever) thd.setDaemon(True) thd.start()
def load_job(job_path, data_path=None, master=None, force=False): if not os.path.exists(job_path): raise ValueError("Job definition does not exist.") job = import_job(job_path) if data_path is None: data_path = os.path.join(root_dir(), "data") root = os.path.join(data_path, "worker", "jobs", job.real_name) if not os.path.exists(root): os.makedirs(root) if master is None: with StandaloneWorkerJobLoader(job, root, force=force) as job_loader: job_loader.run() else: nodes = client_call(master, "get_nodes") local = "%s:%s" % (get_ip(), job.context.job.port) client_call(master, "ready", local) with WorkerJobLoader(job, root, master, local=local, nodes=nodes, force=force) as job_loader: client_call(master, "ready", local) job_loader.ready_for_run()
def load_job(job_path, data_path=None, master=None, force=False): if not os.path.exists(job_path): raise ValueError('Job definition does not exist.') job = import_job(job_path) if data_path is None: data_path = os.path.join(root_dir(), 'data') root = os.path.join(data_path, 'worker', 'jobs', job.real_name) if not os.path.exists(root): os.makedirs(root) if master is None: with StandaloneWorkerJobLoader(job, root, force=force) as job_loader: job_loader.run() else: nodes = client_call(master, 'get_nodes') local = '%s:%s' % (get_ip(), job.context.job.port) client_call(master, 'ready', local) with WorkerJobLoader(job, root, master, local=local, nodes=nodes, force=force) \ as job_loader: client_call(master, 'ready', local) job_loader.ready_for_run()
def load_job(job_path, data_path=None, master=None, force=False): if not os.path.exists(job_path): raise ValueError('Job definition does not exist.') job = import_job(job_path) if data_path is None: data_path = os.path.join(root_dir(), 'data') root = os.path.join( data_path, 'worker', 'jobs', job.real_name) if not os.path.exists(root): os.makedirs(root) if master is None: with StandaloneWorkerJobLoader(job, root, force=force) as job_loader: job_loader.run() else: nodes = client_call(master, 'get_nodes') local = '%s:%s' % (get_ip(), job.context.job.port) client_call(master, 'ready', local) with WorkerJobLoader(job, root, master, local=local, nodes=nodes, force=force) \ as job_loader: client_call(master, 'ready', local) job_loader.ready_for_run()
help="master connected to(in the former of `ip:port` or `ip`)", ) parser.add_argument( "-d", "--data", metavar="data root directory", nargs="?", default=None, const=None, help="root directory to put data", ) parser.add_argument("-f", "--force", metavar="force start", nargs="?", default=False, const=True, type=bool) args = parser.parse_args() data_path = args.data if data_path is None: data_path = os.path.join(root_dir(), "data") force = args.force master = args.master if ":" not in master: master = "%s:%s" % (master, main_conf.master.port) root = os.path.join(data_path, "worker", "watcher") zip_dir = os.path.join(data_path, "zip") job_dir = os.path.join(data_path, "jobs") for dir_ in (root, zip_dir, job_dir): makedirs(dir_) with WorkerWatcher(master, root, zip_dir, job_dir, data_path=data_path, force=force) as master_watcher: master_watcher.run()
thread = self.start_check_worker() thread.join() def makedirs(path): if not os.path.exists(master_watcher_dir): os.makedirs(master_watcher_dir) def create_rpc_server(): rpc_server = ColaRPCServer((get_ip(), main_conf.master.port)) thd = threading.Thread(target=rpc_server.serve_forever) thd.setDaemon(True) thd.start() return rpc_server if __name__ == "__main__": root = root_dir() master_watcher_dir = os.path.join(root, 'data', 'master', 'watcher') makedirs(master_watcher_dir) zip_dir = os.path.join(root, 'data', 'zip') makedirs(zip_dir) job_dir = os.path.join(root, 'data', 'jobs') makedirs(job_dir) lock_f = os.path.join(master_watcher_dir, 'lock') if os.path.exists(lock_f): raise MasterWatcherRunning('There has been a running master watcher.') rpc_server = create_rpc_server() try: open(lock_f, 'w').close()
'--ip', metavar='IP address', nargs='?', default=None, const=None, help='IP Address to start') parser.add_argument('-f', '--force', metavar='force start', nargs='?', default=False, const=True, type=bool) args = parser.parse_args() data_path = args.data if data_path is None: data_path = os.path.join(root_dir(), 'data') ip = args.ip force = args.force root = os.path.join(data_path, 'master', 'watcher') zip_dir = os.path.join(data_path, 'zip') job_dir = os.path.join(data_path, 'jobs') for dir_ in (root, zip_dir, job_dir): makedirs(dir_) with MasterWatcher(root, zip_dir, job_dir, ip_address=ip, data_path=data_path, force=force) \ as master_watcher: master_watcher.run()
if __name__ == "__main__": import argparse parser = argparse.ArgumentParser('Cola worker watcher') parser.add_argument('-m', '--master', metavar='master watcher', required=True, help='master connected to(in the former of `ip:port` or `ip`)') parser.add_argument('-d', '--data', metavar='data root directory', nargs='?', default=None, const=None, help='root directory to put data') parser.add_argument('-f', '--force', metavar='force start', nargs='?', default=False, const=True, type=bool) args = parser.parse_args() data_path = args.data if data_path is None: data_path = os.path.join(root_dir(), 'data') force = args.force master = args.master if ':' not in master: master = '%s:%s' % (master, main_conf.master.port) root = os.path.join(data_path, 'worker', 'watcher') zip_dir = os.path.join(data_path, 'zip') job_dir = os.path.join(data_path, 'jobs') for dir_ in (root, zip_dir, job_dir): makedirs(dir_) with WorkerWatcher(master, root, zip_dir, job_dir, data_path=data_path, force=force) \ as master_watcher: master_watcher.run()