def _unzip(self, job_name): zip_file = os.path.join(self.zip_dir, job_name + '.zip') job_path = os.path.join(self.job_dir, job_name) if os.path.exists(job_path): shutil.rmtree(job_path) if os.path.exists(zip_file): ZipHandler.uncompress(zip_file, self.job_dir)
def runLocalJob(master, job_path): ''' push local job to cola cluster and run ''' if not os.path.exists(job_path): logger.error('Job path not exists!') return try: import_job(job_path) except (ImportError, AttributeError): logger.error('Job path is illegal!') return start_log_server() thread = start_rpc_server() logger.info('Pushing job to cola cluster...') dir_ = tempfile.mkdtemp() try: zip_filename = os.path.split(job_path)[1].replace(' ', '_') + '.zip' zip_file = os.path.join(dir_, zip_filename) ZipHandler.compress(zip_file, job_path, type_filters=("pyc", )) FileTransportClient(master, zip_file).send_file() logger.info('Push finished.') finally: shutil.rmtree(dir_) logger.info('Start to run job.') _client_call(master, 'start_job', zip_filename, True, client) thread.join()
def _unzip(self, job_name): zip_file = os.path.join(self.zip_dir, job_name+'.zip') job_path = os.path.join(self.job_dir, job_name) if os.path.exists(job_path): shutil.rmtree(job_path) if os.path.exists(zip_file): ZipHandler.uncompress(zip_file, self.job_dir)
def create_zip(working_dir): zip_dir = os.path.join(self.working_dir, 'zip') filename = job_name + '.zip' zip_file = os.path.join(zip_dir, filename) ZipHandler.compress(zip_file, job_path, type_filters=('pyc', )) return job_name
def action(self, name): if name == 'stop all': print 'Trying to stop master and all workers.' try: client_call(self.master, 'stop') except socket.error: print 'Cannot connect to cola master.' else: print 'Cola cluster has been shutdown.' elif name == 'list jobs': print 'Running jobs: ' for job in client_call(self.master, 'list_jobs'): print job elif name == 'list workers': print 'Cola workers: ' for worker in client_call(self.master, 'list_workers'): print worker elif name == 'list job dirs': print 'Runnable job dirs: ' for dir_ in client_call(self.master, 'list_job_dirs'): print dir_ elif name.startswith('run remote job '): print 'Remote job will run in background.' job_dir = name[len('run remote job '):] if job_dir not in client_call(self.master, 'list_job_dirs'): print 'Remote job dir not exists!' else: client_call(self.master, 'start_job', job_dir, False) elif name.startswith('run local job '): print 'Job has been committed and will run in background.' start = len('run local job ') path = name[start:].strip().strip('"').strip("'") if not os.path.exists(path): print 'Job path not exists!' else: try: job = import_job(path) except (ImportError, AttributeError): print 'Job path is illegal!' return dir_ = tempfile.mkdtemp() try: zip_filename = os.path.split(path)[1].replace(' ', '_') + '.zip' zip_file = os.path.join(dir_, zip_filename) ZipHandler.compress(zip_file, path, type_filters=("pyc", )) FileTransportClient(self.master, zip_file).send_file() client_call(self.master, 'start_job', zip_filename) finally: shutil.rmtree(dir_)
def pack_job_error(self, job_name): working_dir = os.path.join(self.working_dir, job_name) pack_dir = pack_local_job_error(job_name, working_dir=working_dir, logger=self.logger) zip_filename = os.path.join(self.zip_dir, '%s_%s_errors.zip'%(self.ctx.ip.replace('.', '_'), job_name)) if os.path.exists(zip_filename): os.remove(zip_filename) ZipHandler.compress(zip_filename, pack_dir) FileTransportClient(self.master, zip_filename).send_file()
def run(self, args): master_addr = args.master ctx = Context(is_client=True, master_addr=master_addr) if args.list is True: jobs = ctx.list_jobs() self.logger.info('list jobs at master: %s' % ctx.master_addr) for job_id, info in jobs.iteritems(): self.logger.info( '====> job id: %s, job description: %s, status: %s' % \ (job_id, info['name'], info['status'])) if len(jobs) == 0: self.logger.info('no jobs exist') elif args.kill is not None: job_id = self._get_matched_job_name(ctx, args.kill) if job_id is not None: ctx.kill_job(job_id) self.logger.info('killed job: %s' % job_id) elif args.upload is not None: path = os.path.abspath(args.upload) if not os.path.exists(path): self.logger.error('upload path does not exist') return job_id = None try: job_id = import_job_desc(path).uniq_name except Exception, e: self.logger.exception(e) self.logger.error('uploading job description failed') return new_upload_dir = os.path.join(tempfile.gettempdir(), job_id) if os.path.exists(new_upload_dir): shutil.rmtree(new_upload_dir) shutil.copytree(path, new_upload_dir) temp_filename = os.path.join(tempfile.gettempdir(), job_id + '.zip') ZipHandler.compress(temp_filename, new_upload_dir, type_filters=('pyc', )) try: FileTransportClient(ctx.master_addr, temp_filename).send_file() finally: os.remove(temp_filename) shutil.rmtree(new_upload_dir) self.logger.info('upload job <id: %s> finished' % job_id) if args.run == 'U': client_call(ctx.master_addr, 'run_job', job_id, True) self.logger.info('submit job <id: %s> to the cluster' % job_id)
def run(self, args): master_addr = args.master ctx = Context(is_client=True, master_addr=master_addr) if args.list is True: jobs = ctx.list_jobs() self.logger.info('list jobs at master: %s' % ctx.master_addr) for job_id, info in jobs.iteritems(): self.logger.info( '====> job id: %s, job description: %s, status: %s' % \ (job_id, info['name'], info['status'])) if len(jobs) == 0: self.logger.info('no jobs exist') elif args.kill is not None: job_id = self._get_matched_job_name(ctx, args.kill) if job_id is not None: ctx.kill_job(job_id) self.logger.info('killed job: %s' % job_id) elif args.upload is not None: path = os.path.abspath(args.upload) if not os.path.exists(path): self.logger.error('upload path does not exist') return job_id = None try: job_id = import_job_desc(path).uniq_name except Exception, e: self.logger.exception(e) self.logger.error('uploading job description failed') return new_upload_dir = os.path.join(tempfile.gettempdir(), job_id) if os.path.exists(new_upload_dir): shutil.rmtree(new_upload_dir) shutil.copytree(path, new_upload_dir) temp_filename = os.path.join(tempfile.gettempdir(), job_id+'.zip') ZipHandler.compress(temp_filename, new_upload_dir, type_filters=('pyc', )) try: FileTransportClient(ctx.master_addr, temp_filename).send_file() finally: os.remove(temp_filename) shutil.rmtree(new_upload_dir) self.logger.info('upload job <id: %s> finished' % job_id) if args.run == 'U': client_call(ctx.master_addr, 'run_job', job_id, True) self.logger.info('submit job <id: %s> to the cluster' % job_id)
def testZip(self): zip_file = os.path.join(self.f, 'test.zip') ZipHandler.compress(zip_file, self.src_dir) ZipHandler.uncompress(zip_file, self.dest_dir) dir_ = os.path.join(self.dest_dir, 'compress') self.assertTrue(os.path.exists(dir_)) with open(os.path.join(dir_, '1.txt')) as fp: self.assertEqual(fp.read(), self.content) dir1 = os.path.join(dir_, 'dir1') self.assertTrue(os.path.exists(dir1)) with open(os.path.join(dir1, '2.txt')) as fp: self.assertEqual(fp.read(), self.content)
def pack_job_error(self, job_name): job_master = self.job_tracker.get_job_master(job_name) stage = Stage(job_master.workers, 'pack_job_error') stage.barrier(True, job_name) error_dir = os.path.join(self.working_dir, 'errors') if not os.path.exists(error_dir): os.makedirs(error_dir) error_filename = os.path.join(error_dir, '%s_errors.zip' % job_name) suffix = '%s_errors.zip' % job_name temp_dir = tempfile.mkdtemp() try: for name in os.listdir(self.zip_dir): if name.endswith(suffix): shutil.move(os.path.join(self.zip_dir, name), temp_dir) ZipHandler.compress(error_filename, temp_dir) finally: shutil.rmtree(temp_dir) return error_filename
def pack_job_error(self, job_name): job_master = self.job_tracker.get_job_master(job_name) stage = Stage(job_master.workers, 'pack_job_error') stage.barrier(True, job_name) error_dir = os.path.join(self.working_dir, 'errors') if not os.path.exists(error_dir): os.makedirs(error_dir) error_filename = os.path.join(error_dir, '%s_errors.zip'%job_name) suffix = '%s_errors.zip' % job_name temp_dir = tempfile.mkdtemp() try: for name in os.listdir(self.zip_dir): if name.endswith(suffix): shutil.move(os.path.join(self.zip_dir, name), temp_dir) ZipHandler.compress(error_filename, temp_dir) finally: shutil.rmtree(temp_dir) return error_filename
def start_job(self, zip_filename, uncompress=True): if uncompress: zip_file = os.path.join(self.zip_dir, zip_filename) job_dir = ZipHandler.uncompress(zip_file, self.job_dir) else: job_dir = os.path.join(self.job_dir, zip_filename.rsplit('.', 1)[0]) job = import_job(job_dir) master_port = job.context.job.master_port master = '%s:%s' % (self.master.split(':')[0], master_port) dirname = os.path.dirname(os.path.abspath(__file__)) f = os.path.join(dirname, 'loader.py') subprocess.Popen('python "%s" "%s" %s' % (f, job_dir, master))
def start_job(self, zip_filename, uncompress=True, client=None): if uncompress: zip_file = os.path.join(self.zip_dir, zip_filename) # transfer zip file to workers for watcher in self.nodes_watchers: if watcher.split(':')[0] == self.ip_address: continue file_trans_client = FileTransportClient(watcher, zip_file) file_trans_client.send_file() job_dir = ZipHandler.uncompress(zip_file, self.job_dir) else: job_dir = os.path.join(self.job_dir, zip_filename.rsplit('.', 1)[0]) job = import_job(job_dir) worker_port = job.context.job.port port = job.context.job.master_port nodes = [watcher.split(':')[0] for watcher in self.nodes_watchers] if len(nodes) > 0: info = MasterJobInfo(port, nodes, worker_port) self.running_jobs[job.real_name] = info dirname = os.path.dirname(os.path.abspath(__file__)) f = os.path.join(dirname, 'loader.py') workers = ['%s:%s' % (node, worker_port) for node in nodes] cmds = [ 'python', f, '-j', job_dir, '-i', self.ip_address, '-n', ' '.join(workers) ] if self.data_path is not None: cmds.extend(['-d', self.data_path]) if self.force: cmds.append('-f') if client is not None: cmds.extend(['-c', client]) popen = subprocess.Popen(cmds) info.popen = popen # call workers to start job for worker_watcher in self.nodes_watchers: client_call(worker_watcher, 'start_job', zip_filename, uncompress, ignore=True)
def setUp(self): self.dir = tempfile.mkdtemp() self.root = os.path.join(self.dir, "watch") if not os.path.exists(self.root): os.mkdir(self.root) self.zip_dir = os.path.join(self.dir, "zip") if not os.path.exists(self.zip_dir): os.mkdir(self.zip_dir) self.job_dir = os.path.join(self.dir, "job") if not os.path.exists(self.job_dir): os.mkdir(self.job_dir) zip_file = os.path.join(self.zip_dir, "wiki.zip") src_dir = os.path.join(root_dir(), "contrib", "wiki") self.zip_file = ZipHandler.compress(zip_file, src_dir, type_filters=("pyc",)) self.master_watcher = MasterWatcher(self.root, self.zip_dir, self.job_dir)
def start_job(self, zip_filename, uncompress=True, client=None): if uncompress: zip_file = os.path.join(self.zip_dir, zip_filename) # transfer zip file to workers for watcher in self.nodes_watchers: if watcher.split(':')[0] == self.ip_address: continue file_trans_client = FileTransportClient(watcher, zip_file) file_trans_client.send_file() job_dir = ZipHandler.uncompress(zip_file, self.job_dir) else: job_dir = os.path.join(self.job_dir, zip_filename.rsplit('.', 1)[0]) job = import_job(job_dir) worker_port = job.context.job.port port = job.context.job.master_port nodes = [watcher.split(':')[0] for watcher in self.nodes_watchers] if len(nodes) > 0: info = MasterJobInfo(port, nodes, worker_port) self.running_jobs[job.real_name] = info dirname = os.path.dirname(os.path.abspath(__file__)) f = os.path.join(dirname, 'loader.py') workers = ['%s:%s'%(node, worker_port) for node in nodes] cmds = ['python', f, '-j', job_dir, '-i', self.ip_address, '-n', ' '.join(workers)] if self.data_path is not None: cmds.extend(['-d', self.data_path]) if self.force: cmds.append('-f') if client is not None: cmds.extend(['-c', client]) popen = subprocess.Popen(cmds) info.popen = popen # call workers to start job for worker_watcher in self.nodes_watchers: client_call(worker_watcher, 'start_job', zip_filename, uncompress, ignore=True)
def setUp(self): self.dir = tempfile.mkdtemp() self.zip_dir = os.path.join(self.dir, "zip") if not os.path.exists(self.zip_dir): os.mkdir(self.zip_dir) self.job_dir = os.path.join(self.dir, "job") if not os.path.exists(self.job_dir): os.mkdir(self.job_dir) zip_file = os.path.join(self.zip_dir, "wiki.zip") src_dir = os.path.join(root_dir(), "contrib", "wiki") self.zip_file = ZipHandler.compress(zip_file, src_dir, type_filters=("pyc",)) self.rpc_server = ColaRPCServer(("localhost", main_conf.master.port)) self.master_watcher = MasterWatcher(self.rpc_server, self.zip_dir, self.job_dir) thd = threading.Thread(target=self.rpc_server.serve_forever) thd.setDaemon(True) thd.start()
class Test(unittest.TestCase): def setUp(self): self.working_dir = tempfile.mkdtemp() self.job_dir = os.path.join(self.working_dir, 'master', 'jobs') self.zip_dir = os.path.join(self.working_dir, 'master', 'zip') if not os.path.exists(self.zip_dir): os.makedirs(self.zip_dir) wiki_path = os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'app', 'wiki') try: shutil.copytree(wiki_path, os.path.join(self.job_dir, 'wiki')) except OSError, e: if e.errno == errno.ENOTDIR: shutil.copy(wiki_path, os.path.join(self.job_dir, 'wiki')) else: raise self.job_name = import_job_desc(wiki_path).uniq_name old_wiki_path = os.path.join(self.job_dir, 'wiki') new_wiki_path = os.path.join(self.job_dir, self.job_name) os.rename(old_wiki_path, new_wiki_path) ZipHandler.compress(os.path.join(self.zip_dir, self.job_name+'.zip'), new_wiki_path) config_file = os.path.join(new_wiki_path, 'wiki.yaml') try: os.remove(os.path.join(new_wiki_path, 'test.yaml')) except: pass with open(config_file) as f: yaml_obj = yaml.load(f) yaml_obj['job']['size'] = 5 yaml_obj['job']['instances'] = 1 yaml_obj['job']['priorities'] = 1 with open(config_file, 'w') as f: yaml.dump(yaml_obj, f)
def start_job(self, zip_filename, uncompress=True, client=None): if uncompress: zip_file = os.path.join(self.zip_dir, zip_filename) # transfer zip file to workers for watcher in self.nodes_watchers: if watcher.split(":")[0] == self.ip_address: continue file_trans_client = FileTransportClient(watcher, zip_file) file_trans_client.send_file() job_dir = ZipHandler.uncompress(zip_file, self.job_dir) else: job_dir = os.path.join(self.job_dir, zip_filename.rsplit(".", 1)[0]) job = import_job(job_dir) worker_port = job.context.job.port port = job.context.job.master_port nodes = [watcher.split(":")[0] for watcher in self.nodes_watchers] if len(nodes) > 0: info = MasterJobInfo(port, nodes, worker_port) self.running_jobs[job.real_name] = info dirname = os.path.dirname(os.path.abspath(__file__)) f = os.path.join(dirname, "loader.py") workers = ["%s:%s" % (node, worker_port) for node in nodes] cmds = ["python", f, "-j", job_dir, "-i", self.ip_address, "-n", " ".join(workers)] if self.data_path is not None: cmds.extend(["-d", self.data_path]) if self.force: cmds.append("-f") if client is not None: cmds.extend(["-c", client]) popen = subprocess.Popen(cmds) info.popen = popen # call workers to start job for worker_watcher in self.nodes_watchers: client_call(worker_watcher, "start_job", zip_filename, uncompress)
def setUp(self): self.dir = tempfile.mkdtemp() self.root = os.path.join(self.dir, 'watch') if not os.path.exists(self.root): os.mkdir(self.root) self.zip_dir = os.path.join(self.dir, 'zip') if not os.path.exists(self.zip_dir): os.mkdir(self.zip_dir) self.job_dir = os.path.join(self.dir, 'job') if not os.path.exists(self.job_dir): os.mkdir(self.job_dir) zip_file = os.path.join(self.zip_dir, 'wiki.zip') src_dir = os.path.join(root_dir(), 'contrib', 'wiki') self.zip_file = ZipHandler.compress(zip_file, src_dir, type_filters=('pyc', )) self.master_watcher = MasterWatcher(self.root, self.zip_dir, self.job_dir)
def start_job(self, zip_filename, uncompress=True): if uncompress: zip_file = os.path.join(self.zip_dir, zip_filename) job_dir = ZipHandler.uncompress(zip_file, self.job_dir) else: job_dir = os.path.join(self.job_dir, zip_filename.rsplit('.', 1)[0]) job = import_job(job_dir) master_port = job.context.job.master_port master = '%s:%s' % (self.master.split(':')[0], master_port) dirname = os.path.dirname(os.path.abspath(__file__)) f = os.path.join(dirname, 'loader.py') cmds = ['python', f, '-j', job_dir, '-m', master] if self.data_path is not None: cmds.extend(['-d', self.data_path]) if self.force: cmds.append('-f') popen = subprocess.Popen(cmds) self.running_jobs[job.real_name] = WorkerJobInfo(job.context.job.port, popen)
def start_job(self, zip_filename, uncompress=True): if uncompress: zip_file = os.path.join(self.zip_dir, zip_filename) job_dir = ZipHandler.uncompress(zip_file, self.job_dir) else: job_dir = os.path.join(self.job_dir, zip_filename.rsplit(".", 1)[0]) job = import_job(job_dir) master_port = job.context.job.master_port master = "%s:%s" % (self.master.split(":")[0], master_port) dirname = os.path.dirname(os.path.abspath(__file__)) f = os.path.join(dirname, "loader.py") cmds = ["python", f, "-j", job_dir, "-m", master] if self.data_path is not None: cmds.extend(["-d", self.data_path]) if self.force: cmds.append("-f") popen = subprocess.Popen(cmds) self.running_jobs[job.real_name] = WorkerJobInfo(job.context.job.port, popen)
def setUp(self): self.dir = tempfile.mkdtemp() self.zip_dir = os.path.join(self.dir, 'zip') if not os.path.exists(self.zip_dir): os.mkdir(self.zip_dir) self.job_dir = os.path.join(self.dir, 'job') if not os.path.exists(self.job_dir): os.mkdir(self.job_dir) zip_file = os.path.join(self.zip_dir, 'wiki.zip') src_dir = os.path.join(root_dir(), 'contrib', 'wiki') self.zip_file = ZipHandler.compress(zip_file, src_dir, type_filters=('pyc', )) self.rpc_server = ColaRPCServer(('localhost', main_conf.master.port)) self.master_watcher = MasterWatcher(self.rpc_server, self.zip_dir, self.job_dir) thd = threading.Thread(target=self.rpc_server.serve_forever) thd.setDaemon(True) thd.start()
def start_job(self, zip_filename, uncompress=True): if uncompress: zip_file = os.path.join(self.zip_dir, zip_filename) job_dir = ZipHandler.uncompress(zip_file, self.job_dir) else: job_dir = os.path.join(self.job_dir, zip_filename.rsplit('.', 1)[0]) job = import_job(job_dir) master_port = job.context.job.master_port master = '%s:%s' % (self.master.split(':')[0], master_port) dirname = os.path.dirname(os.path.abspath(__file__)) f = os.path.join(dirname, 'loader.py') cmds = ['python', f, '-j', job_dir, '-m', master] if self.data_path is not None: cmds.extend(['-d', self.data_path]) if self.force: cmds.append('-f') popen = subprocess.Popen(cmds) self.running_jobs[job.real_name] = WorkerJobInfo( job.context.job.port, popen)
def start_job(self, zip_filename, uncompress=True): if uncompress: zip_file = os.path.join(self.zip_dir, zip_filename) # transfer zip file to workers for watcher in self.nodes_watchers: if watcher.split(':')[0] == self.ip_address: continue file_trans_client = FileTransportClient(watcher, zip_file) file_trans_client.send_file() job_dir = ZipHandler.uncompress(zip_file, self.job_dir) else: job_dir = os.path.join(self.job_dir, zip_filename.rsplit('.', 1)[0]) job = import_job(job_dir) worker_port = job.context.job.port port = job.context.job.master_port nodes = [watcher.split(':')[0] for watcher in self.nodes_watchers] if len(nodes) > 0: info = MasterJobInfo(port, nodes, worker_port) self.running_jobs[job.real_name] = info dirname = os.path.dirname(os.path.abspath(__file__)) f = os.path.join(dirname, 'loader.py') workers = ['%s:%s'%(node, worker_port) for node in nodes] subprocess.Popen('python "%(py)s" "%(job_dir)s" %(nodes)s' % { 'py': f, 'job_dir': job_dir, 'nodes': ' '.join(workers) }) # call workers to start job for worker_watcher in self.nodes_watchers: client_call(worker_watcher, 'start_job', zip_filename, uncompress)
def _unzip(self, job_name): zip_file = os.path.join(self.zip_dir, job_name+'.zip') if os.path.exists(zip_file): ZipHandler.uncompress(zip_file, self.job_dir)
def _unzip(self, job_name): zip_file = os.path.join(self.zip_dir, job_name + '.zip') if os.path.exists(zip_file): ZipHandler.uncompress(zip_file, self.job_dir)