class DeploymentRPCInterface: def __init__(self, supervisord, **config): self.supervisord = supervisord self.global_config = config self.supervisor_rpcinterface = SupervisorNamespaceRPCInterface(supervisord) self.package_server = config.get('package_server') self.download_package_uri = config.get('download_package_uri') self.get_latest_package_info_uri = config.get('get_latest_package_info_uri') def get_run_dir(self, service, cluster, job, instance_id=-1): ''' Get the run directory of the specified job @param service the server name @param cluster the cluster name @param job the job name @param instance_id the instance id @return string the job's run root directory ''' app_root = self.global_config.get('app_root', DEFAULT_APP_ROOT) if instance_id == -1: return '%s/%s/%s/%s' % (app_root, service, cluster, job) else: return '%s/%s/%s/%s/%s' % (app_root, service, cluster, job, instance_id) def get_log_dir(self, service, cluster, job, instance_id=-1): ''' Get the log directory of the specified job @param service the server name @param cluster the cluster name @param job the job name @param instance_id the instance id @return string the job's log root directory ''' log_root = self.global_config.get('log_root', DEFAULT_LOG_ROOT) if instance_id == -1: return '%s/%s/%s/%s' % (log_root, service, cluster, job) else: return '%s/%s/%s/%s/%s' % (log_root, service, cluster, job, instance_id) def get_stdout_dir(self, service, cluster, job, instance_id=-1): ''' Get the stdout directory of the specified job @param service the server name @param cluster the cluster name @param job the job name @param instance_id the instance id @return string the job's log root directory ''' run_dir = self.get_run_dir(service, cluster, job, instance_id) return '%s/stdout' % run_dir def get_available_data_dirs(self, service, cluster, job, instance_id=-1): ''' Get all the available data directories that the specified job may use @param service the server name @param cluster the cluster name @param job the job name @param instance_id the instance id @return list all the available data root directories ''' data_dirs = self.global_config.get('data_dirs', DEFAULT_DATA_DIRS) if instance_id == -1: return ['%s/%s/%s/%s' % (data_dir, service, cluster, job) for data_dir in data_dirs.split(',') ] else: return ['%s/%s/%s/%s/%s' % (data_dir, service, cluster, job, instance_id) for data_dir in data_dirs.split(',') ] def get_data_dirs(self, service, cluster, job, instance_id=-1): ''' Get all the data directories of the specified job @param service the server name @param cluster the cluster name @param job the job name @param instance_id the instance id @return list the job's data root directories ''' file_name = '%s/%s' % (self.get_run_dir(service, cluster, job, instance_id), JOB_RUN_CONFIG) if not os.path.exists(file_name): return 'You should bootstrapped the job first' data_dirs = self.get_available_data_dirs(service, cluster, job, instance_id) run_config = ConfigParser.SafeConfigParser() run_config.read([file_name]) data_dir_indexes = run_config.get('run_info', 'data_dir_indexes') job_data_dirs = [] for i in data_dir_indexes.split(','): job_data_dirs.append(data_dirs[int(i)]) return job_data_dirs def get_package_dir(self, service, cluster, job, instance_id=-1): ''' Get the current package directory of the specified job @param service the server name @param cluster the cluster name @param job the job name @param instance_id the instance id @return string the job's package root directory(symbol link) ''' return '%s/package' % self.get_run_dir(service, cluster, job, instance_id) def get_real_package_dir(self, service, cluster, job, instance_id=-1): ''' Get the current package directory real path of the specified job @param service the server name @param cluster the cluster name @param job the job name @param instance_id the instance id @return string the job's package root directory(real path) ''' return os.readlink(self.get_package_dir(service, cluster, job, instance_id)) def get_current_package_dir(self, service, cluster): ''' Get the currently used package directory of the specified service @param service the service name @param cluster the cluster name @return string the currently used package directory ''' package_root = self.global_config.get('package_root') return '%s/%s/%s/current' % (package_root, service, cluster) def get_cleanup_token(self, service, cluster, job, instance_id=-1): ''' Get the token used to do cleanuping @param service the server name @param cluster the cluster name @param job the job name @param instance_id the instance id @return string the job's cleanup token ''' file_name = '%s/%s' % (self.get_run_dir(service, cluster, job, instance_id), JOB_RUN_CONFIG) if not os.path.exists(file_name): return 'You should bootstrap the job first' run_config = ConfigParser.SafeConfigParser() run_config.read([file_name]) return run_config.get('run_info', 'cleanup_token') def bootstrap(self, service, cluster, job, config_dict, instance_id=-1): ''' Bootstrap the specified job @param service the server name @param cluster the cluster name @param job the job name @param instance_id the instance id @param config_dict the config information dictionary @return string 'OK' on success, otherwise, the error message Note: config_dict must contain the following item: 1. artifact 2. bootstrap.sh 3. if any config files are needed, just put it in 'config_files' item config_dict can also contain the following optional items: 1. cleanup_token: if this token is specified, user should supply the token to do cleanup 2. package_name: package_name, revision, timestamp should be specified simultaneously, otherwise will be ignored 3. revision 4. timestamp 5. data_dir_indexes: if this is not specified, the first data_dir is used by default 6. force_update This is an example: config_dict = { 'artifact': 'hadoop', 'bootstrap.sh': $bootstrap_file_content, 'config_files': { 'core-site.xml': $core_site_xml_content, ... }, } ''' return self._do_bootstrap(service, cluster, job, instance_id, **config_dict) def start(self, service, cluster, job, config_dict, instance_id=-1): ''' Start the specified job @param service the server name @param cluster the cluster name @param job the job name @param instance_id the instance id @param config_dict the config information dictionary @return string 'OK' on success, otherwise, the error message Note: config_dict must contain the following item: 1. start.sh 2. artifact 3. if any config files are needed, just put it in 'config_files' item config_dict can also contain the following optional items: 1. http_url: the server's http service url 2. package_name: package_name, revision, timestamp should be specified simultaneously, otherwise will be ignored 3. revision 4. timestamp 5. force_update This is an example: config_dict = { 'start.sh': $start_file_content, 'artifact': hadoop, 'config_files': { 'core-site.xml': $core_site_xml_content, ... }, 'http_url': 'http://10.235.3.67:11201', } ''' return self._do_start(service, cluster, job, instance_id, **config_dict) def stop(self, service, cluster, job, config_dict, instance_id=-1): ''' Stop the specified job @param service the server name @param cluster the cluster name @param job the job name @param instance_id the instance id @param config_dict the config information dictionary @return string 'OK' on success, otherwise, the error message Note: config_dict is not used currently, reserved for extendibility ''' return self._do_stop(service, cluster, job, instance_id, **config_dict) def cleanup(self, service, cluster, job, config_dict, instance_id=-1): ''' Cleanup the specified job's data/log directories @param service the server name @param cluster the cluster name @param job the job name @param instance_id the instance id @param config_dict the config information dictionary @return string 'OK' on success, otherwise, the error message Note: config_dict may contain the following item: 1. cleanup_token: [optional] token used to do verification 2. cleanup.sh: [optional] script used to do cleanuping This is an example: config_dict = { 'cleanup_token': '550e8400-e29b-41d4-a716-446655440000', 'cleanup.sh': $cleanup_script, } ''' return self._do_cleanup(service, cluster, job, instance_id, **config_dict) def show(self, service, cluster, job, config_dict, instance_id=-1): ''' Get the specified job's current status @param service the server name @param cluster the cluster name @param job the job name @param instance_id the instance id @param config_dict the config information dictionary @return string the process status Possible values of process status: RUNNING STARTING BACKOFF STOPPING EXITED FATAL UNKNOWN Note: config_dict is not used currently, reserved for extendibility ''' return self._do_show(service, cluster, job, instance_id, **config_dict) def read_file(self, file_path): ''' Read the file with the given file path on server @param file_path the name of file to read ''' with open(file_path) as fi: return fi.read() def write_text_to_file(self, file_path, content): ''' Write content to the file with the given file path on server @param file_path the name of file to write @param content the content to write ''' with open(file_path, 'w') as fi: fi.write(content) return 'OK' def append_text_to_file(self, file_path, content): ''' Append content to the file with the given file path on server @param file_path the name of file to append @param content the content to append ''' with open(file_path, 'a') as fi: fi.write(content) return 'OK' def _get_package_uri(self, artifact, revision, timestamp, package_name): return '%s/%s/%s/%s-%s/%s' % (self.package_server, self.download_package_uri, artifact, revision, timestamp, package_name) def _get_query_latest_package_info_uri(self, artifact): return '%s/%s/?artifact=%s' % (self.package_server, self.get_latest_package_info_uri, artifact) def _downlowd_package(self, uri, dest_file): data_file = urllib2.urlopen(uri, None, 30) if not os.path.exists(os.path.dirname(dest_file)): os.makedirs(os.path.dirname(dest_file)) fp = open(dest_file, 'wb') fp.write(data_file.read()) fp.close() data_file.close() def _write_file(self, file_path, file_content): fp = open(file_path, 'wb') fp.write(file_content) fp.close() def _write_config_files(self, run_dir, **config_dict): for file_name, content in config_dict.iteritems(): file_path = '%s/%s' % (run_dir, file_name) if os.path.exists(file_path): os.remove(file_path) self._write_file(file_path, content) def _get_process_name(self, service, cluster, job, instance_id): if instance_id == -1: return '%s--%s--%s' % (service, cluster, job) else: return '%s--%s--%s%d' % (service, cluster, job, instance_id) def _cleanup_dir(self, path): cmd = 'rm -rf %s/*' % path subprocess.check_call(cmd, shell=True) def _check_dir_empty(self, path): if not os.path.exists(path): return True lists = os.listdir(path) return len(lists) == 0 def _check_bootstrapped(self, service, cluster, job, instance_id): run_dir = self.get_run_dir(service, cluster, job, instance_id) return os.path.exists('%s/%s' % (run_dir, JOB_RUN_CONFIG)) def _get_latest_package_info(self, artifact): uri = self._get_query_latest_package_info_uri(artifact) info_fp = urllib2.urlopen(uri, None, 30) info = info_fp.read() if info and info.startswith('{'): info_dict = eval(info) info_fp.close() return info_dict else: info_fp.close() return None def _make_package_dir(self, artifact, service, cluster, job, instance_id, revision, timestamp, package_name): # Check if the tarball is already downloaded, if not, download it package_path = '%s/%s/%s/%s-%s/%s' % (self.global_config.get('package_root'), service, cluster, revision, timestamp, package_name) if not os.path.exists(package_path): self._downlowd_package( self._get_package_uri(artifact, revision, timestamp, package_name), package_path) # Unpack the tarball package_dir = package_path[0: len(package_path) - len('.tar.gz')] if os.path.exists(package_dir): cmd = ['rm', '-rf', package_dir] subprocess.check_call(cmd) cmd = ['tar', '-zxf', package_path, '-C', os.path.dirname(package_dir)] subprocess.check_call(cmd) # Link the package dir to the 'current' current_dir = self.get_current_package_dir(service, cluster) if os.path.lexists(current_dir): os.unlink(current_dir) os.symlink(package_dir, current_dir) # Link the package dir to the run dir symbol_package_dir = self.get_package_dir(service, cluster, job, instance_id) if os.path.lexists(symbol_package_dir): os.unlink(symbol_package_dir) os.symlink(package_dir, symbol_package_dir) return package_dir def _update_run_cfg(self, file_path, section, key, value): run_config = ConfigParser.SafeConfigParser() run_config.read([file_path]) run_config.set(section, key, value) fp = open(file_path, 'w') run_config.write(fp) fp.close() def _prepare_run_env(self, service, cluster, job, instance_id, **config_dict): artifact = config_dict.get('artifact') if not artifact: return 'Invalid config_dict: can\'t find artifact' # Create run dirs run_dir = self.get_run_dir(service, cluster, job, instance_id) if not os.path.exists(run_dir): os.makedirs(run_dir) # Create stdout dir stdout_dir = self.get_stdout_dir(service, cluster, job, instance_id) if not os.path.exists(stdout_dir): os.makedirs(stdout_dir) # Create and link log dir to the run dir log_dir = self.get_log_dir(service, cluster, job, instance_id) if os.path.exists(log_dir): if not self._check_dir_empty(log_dir): return 'The log dir %s is not empty, please do cleanup first' % log_dir else: os.makedirs(log_dir) symbol_log_dir = '%s/log' % run_dir if not os.path.exists(symbol_log_dir): os.symlink(log_dir, symbol_log_dir) # Create and link data dirs to the run dir data_dirs = self.global_config.get('data_dirs', DEFAULT_DATA_DIRS).split(',') data_dir_indexes = (config_dict.get('data_dir_indexes') or '0') for i in data_dir_indexes.split(','): if instance_id == -1: data_dir = '%s/%s/%s/%s' % (data_dirs[int(i)], service, cluster, job) else: data_dir = '%s/%s/%s/%s/%s' % (data_dirs[int(i)], service, cluster, job, instance_id) if os.path.exists(data_dir): if not self._check_dir_empty(data_dir): return 'The data dir %s is not empty, please do cleanup first' % data_dir else: os.makedirs(data_dir) symbol_data_dir = '%s/%s' % (run_dir, os.path.basename(data_dirs[int(i)])) if not os.path.exists(symbol_data_dir): os.symlink(data_dir, symbol_data_dir) # Check the package information force_update = config_dict.get('force_update', False) if force_update: package_info = self._get_latest_package_info(artifact) if package_info: package_name = package_info.get('package_name') revision = package_info.get('revision') timestamp = package_info.get('timestamp') else: package_name = config_dict.get('package_name') revision = config_dict.get('revision') timestamp = config_dict.get('timestamp') if not (package_name and revision and timestamp): package_info = self._get_latest_package_info(artifact) if package_info: package_name = package_info.get('package_name') revision = package_info.get('revision') timestamp = package_info.get('timestamp') if not (package_name and revision and timestamp): return 'No package found on package server of %s' % artifact # Write the job's run.cfg package_dir = self._make_package_dir(artifact, service, cluster, job, instance_id, revision, timestamp, package_name) cleanup_token = config_dict.get('cleanup_token', str()) run_config = ConfigParser.SafeConfigParser() run_config.add_section('run_info') run_config.set('run_info', 'cleanup_token', cleanup_token) run_config.set('run_info', 'data_dir_indexes', data_dir_indexes) run_config.set('run_info', 'run_dir', run_dir) run_config.set('run_info', 'log_dir', log_dir) run_config.set('run_info', 'package_dir', package_dir) fp = open('%s/%s' % (run_dir, JOB_RUN_CONFIG), 'w') run_config.write(fp) fp.close() return SUCCESS_STATUS def _do_bootstrap(self, service, cluster, job, instance_id, **config_dict): # prepare run dir message = self._prepare_run_env(service, cluster, job, instance_id, **config_dict) if message != SUCCESS_STATUS: return message # Write other config files to local disk config_files = config_dict.get('config_files') service_root = self.get_run_dir(service, cluster, job, instance_id) if config_files: self._write_config_files(service_root, **config_files) # Do bootstraping bootstrap_sh = config_dict.get('bootstrap.sh') if bootstrap_sh: self._write_file('%s/bootstrap.sh' % service_root, bootstrap_sh) cmd = ['/bin/bash', '%s/bootstrap.sh' % service_root] subprocess.call(cmd) return SUCCESS_STATUS def _do_start(self, service, cluster, job, instance_id, **config_dict): artifact = config_dict.get('artifact') if not artifact: return 'Inval config_dict: can\'t find artifact' if not self._check_bootstrapped(service, cluster, job, instance_id): return "You should bootstrap the job first" # Check if need update the package force_update = config_dict.get('force_update', False) if force_update: package_info = self._get_latest_package_info(artifact) if package_info: package_name = package_info.get('package_name') revision = package_info.get('revision') timestamp = package_info.get('timestamp') else: package_name = config_dict.get('package_name') revision = config_dict.get('revision') timestamp = config_dict.get('timestamp') if (package_name and revision and timestamp): package_path = '%s/%s/%s-%s/%s' % ( self.global_config.get('package_root'), artifact, revision, timestamp, package_name) if not os.path.exists(package_path): self._downlowd_package( self._get_package_uri(artifact, revision, timestamp, package_name), package_path) package_dir = self._make_package_dir(artifact, service, cluster, job, instance_id, revision, timestamp, package_name) run_cfg = '%s/%s' % (self.get_run_dir(service, cluster, job, instance_id), JOB_RUN_CONFIG) self._update_run_cfg(run_cfg, 'run_info', 'package_dir', package_dir) # Write the start script to local disk start_sh = config_dict.get('start.sh') service_root = self.get_run_dir(service, cluster, job, instance_id) if not start_sh and not os.path.exists('%s/start.sh' % service_root): return 'No start script found' elif start_sh: self._write_file('%s/start.sh' % service_root, start_sh) # Write other config files to local disk config_files = config_dict.get('config_files') if config_files: self._write_config_files(service_root, **config_files) # Write supervisor config http_url = config_dict.get('http_url', '') process_name = self._get_process_name(service, cluster, job, instance_id) job_config = ConfigParser.SafeConfigParser() section = 'program:%s' % process_name job_config.add_section(section) job_config.set(section, 'command', '/bin/bash %s/start.sh' % service_root) job_config.set(section, 'process_name', process_name) job_config.set(section, 'directory', service_root) job_config.set(section, 'http_url', http_url) # Process will be unconditionally restarted when it exits, without regard # to its exit code job_config.set(section, 'autorestart', 'true') job_config.set(section, 'exitcodes', str(DEFAULT_EXPECTED_EXIT_CODE)) # Process will NOT be automatically started when supervisor restart. job_config.set(section, 'autostart', 'false') fp = open('%s/%s/%s.cfg' % (os.getcwd(), CONFIG_PATH, process_name), 'wb') job_config.write(fp) fp.close() # Start the job self.supervisor_rpcinterface.reloadConfig() try: self.supervisor_rpcinterface.addProcessGroup(process_name) except RPCError, e: if e.code != Faults.ALREADY_ADDED: raise e self.supervisor_rpcinterface.startProcess(process_name)() return SUCCESS_STATUS
class DeploymentRPCInterface: def __init__(self, supervisord, **config): self.supervisord = supervisord self.global_config = config self.supervisor_rpcinterface = SupervisorNamespaceRPCInterface( supervisord) self.package_server = config.get('package_server') self.download_package_uri = config.get('download_package_uri') self.get_latest_package_info_uri = config.get( 'get_latest_package_info_uri') def get_run_dir(self, service, cluster, job): ''' Get the run directory of the specified job @param service the server name @param cluster the cluster name @param job the job name @return string the job's run root directory ''' app_root = self.global_config.get('app_root', DEFAULT_APP_ROOT) return '%s/%s/%s/%s' % (app_root, service, cluster, job) def get_log_dir(self, service, cluster, job): ''' Get the log directory of the specified job @param service the server name @param cluster the cluster name @param job the job name @return string the job's log root directory ''' log_root = self.global_config.get('log_root', DEFAULT_LOG_ROOT) return '%s/%s/%s/%s' % (log_root, service, cluster, job) def get_stdout_dir(self, service, cluster, job): ''' Get the stdout directory of the specified job @param service the server name @param cluster the cluster name @param job the job name @return string the job's log root directory ''' run_dir = self.get_run_dir(service, cluster, job) return '%s/stdout' % run_dir def get_available_data_dirs(self, service, cluster, job): ''' Get all the available data directories that the specified job may use @param service the server name @param cluster the cluster name @param job the job name @return list all the available data root directories ''' data_dirs = self.global_config.get('data_dirs', DEFAULT_DATA_DIRS) return [ '%s/%s/%s/%s' % (data_dir, service, cluster, job) for data_dir in data_dirs.split(',') ] def get_data_dirs(self, service, cluster, job): ''' Get all the data directories of the specified job @param service the server name @param cluster the cluster name @param job the job name @return list the job's data root directories ''' file_name = '%s/%s' % (self.get_run_dir(service, cluster, job), JOB_RUN_CONFIG) if not os.path.exists(file_name): return 'You should bootstrapped the job first' data_dirs = self.get_available_data_dirs(service, cluster, job) run_config = ConfigParser.SafeConfigParser() run_config.read([file_name]) data_dir_indexes = run_config.get('run_info', 'data_dir_indexes') job_data_dirs = [] for i in data_dir_indexes.split(','): job_data_dirs.append(data_dirs[int(i)]) return job_data_dirs def get_package_dir(self, service, cluster, job): ''' Get the current package directory of the specified job @param service the server name @param cluster the cluster name @param job the job name @return string the job's package root directory(symbol link) ''' return '%s/package' % self.get_run_dir(service, cluster, job) def get_real_package_dir(self, service, cluster, job): ''' Get the current package directory real path of the specified job @param service the server name @param cluster the cluster name @param job the job name @return string the job's package root directory(real path) ''' return os.readlink(self.get_package_dir(service, cluster, job)) def get_current_package_dir(self, service, cluster): ''' Get the currently used package directory of the specified service @param service the service name @param cluster the cluster name @return string the currently used package directory ''' package_root = self.global_config.get('package_root') return '%s/%s/%s/current' % (package_root, service, cluster) def get_cleanup_token(self, service, cluster, job): ''' Get the token used to do cleanuping @param service the server name @param cluster the cluster name @param job the job name @return string the job's cleanup token ''' file_name = '%s/%s' % (self.get_run_dir(service, cluster, job), JOB_RUN_CONFIG) if not os.path.exists(file_name): return 'You should bootstrap the job first' run_config = ConfigParser.SafeConfigParser() run_config.read([file_name]) return run_config.get('run_info', 'cleanup_token') def bootstrap(self, service, cluster, job, config_dict): ''' Bootstrap the specified job @param service the server name @param cluster the cluster name @param job the job name @param config_dict the config information dictionary @return string 'OK' on success, otherwise, the error message Note: config_dict must contain the following item: 1. artifact 2. bootstrap.sh 3. if any config files are needed, just put it in 'config_files' item config_dict can also contain the following optional items: 1. cleanup_token: if this token is specified, user should supply the token to do cleanup 2. package_name: package_name, revision, timestamp should be specified simultaneously, otherwise will be ignored 3. revision 4. timestamp 5. data_dir_indexes: if this is not specified, the first data_dir is used by default 6. force_update This is an example: config_dict = { 'artifact': 'hadoop', 'bootstrap.sh': $bootstrap_file_content, 'config_files': { 'core-site.xml': $core_site_xml_content, ... }, } ''' return self._do_bootstrap(service, cluster, job, **config_dict) def start(self, service, cluster, job, config_dict): ''' Start the specified job @param service the server name @param cluster the cluster name @param job the job name @param config_dict the config information dictionary @return string 'OK' on success, otherwise, the error message Note: config_dict must contain the following item: 1. start.sh 2. artifact 3. if any config files are needed, just put it in 'config_files' item config_dict can also contain the following optional items: 1. http_url: the server's http service url 2. package_name: package_name, revision, timestamp should be specified simultaneously, otherwise will be ignored 3. revision 4. timestamp 5. force_update This is an example: config_dict = { 'start.sh': $start_file_content, 'artifact': hadoop, 'config_files': { 'core-site.xml': $core_site_xml_content, ... }, 'http_url': 'http://10.235.3.67:11201', } ''' return self._do_start(service, cluster, job, **config_dict) def stop(self, service, cluster, job, config_dict): ''' Stop the specified job @param service the server name @param cluster the cluster name @param job the job name @param config_dict the config information dictionary @return string 'OK' on success, otherwise, the error message Note: config_dict is not used currently, reserved for extendibility ''' return self._do_stop(service, cluster, job, **config_dict) def cleanup(self, service, cluster, job, config_dict): ''' Cleanup the specified job's data/log directories @param service the server name @param cluster the cluster name @param job the job name @param config_dict the config information dictionary @return string 'OK' on success, otherwise, the error message Note: config_dict may contain the following item: 1. cleanup_token: [optional] token used to do verification 2. cleanup.sh: [optional] script used to do cleanuping This is an example: config_dict = { 'cleanup_token': '550e8400-e29b-41d4-a716-446655440000', 'cleanup.sh': $cleanup_script, } ''' return self._do_cleanup(service, cluster, job, **config_dict) def show(self, service, cluster, job, config_dict): ''' Get the specified job's current status @param service the server name @param cluster the cluster name @param job the job name @param config_dict the config information dictionary @return string the process status Possible values of process status: RUNNING STARTING BACKOFF STOPPING EXITED FATAL UNKNOWN Note: config_dict is not used currently, reserved for extendibility ''' return self._do_show(service, cluster, job, **config_dict) def _get_package_uri(self, artifact, revision, timestamp, package_name): return '%s/%s/%s/%s-%s/%s' % (self.package_server, self.download_package_uri, artifact, revision, timestamp, package_name) def _get_query_latest_package_info_uri(self, artifact): return '%s/%s/?artifact=%s' % ( self.package_server, self.get_latest_package_info_uri, artifact) def _downlowd_package(self, uri, dest_file): data_file = urllib2.urlopen(uri, None, 30) if not os.path.exists(os.path.dirname(dest_file)): os.makedirs(os.path.dirname(dest_file)) fp = open(dest_file, 'wb') fp.write(data_file.read()) fp.close() data_file.close() def _write_file(self, file_path, file_content): fp = open(file_path, 'wb') fp.write(file_content) fp.close() def _write_config_files(self, run_dir, **config_dict): for file_name, content in config_dict.iteritems(): file_path = '%s/%s' % (run_dir, file_name) if os.path.exists(file_path): os.remove(file_path) self._write_file(file_path, content) def _get_process_name(self, service, cluster, job): return '%s--%s--%s' % (service, cluster, job) def _cleanup_dir(self, path): cmd = 'rm -rf %s/*' % path subprocess.check_call(cmd, shell=True) def _check_dir_empty(self, path): if not os.path.exists(path): return True lists = os.listdir(path) return len(lists) == 0 def _check_bootstrapped(self, service, cluster, job): run_dir = self.get_run_dir(service, cluster, job) return os.path.exists('%s/%s' % (run_dir, JOB_RUN_CONFIG)) def _get_latest_package_info(self, artifact): uri = self._get_query_latest_package_info_uri(artifact) info_fp = urllib2.urlopen(uri, None, 30) info = info_fp.read() if info and info.startswith('{'): info_dict = eval(info) info_fp.close() return info_dict else: info_fp.close() return None def _make_package_dir(self, artifact, service, cluster, job, revision, timestamp, package_name): # Check if the tarball is already downloaded, if not, download it package_path = '%s/%s/%s/%s-%s/%s' % ( self.global_config.get('package_root'), service, cluster, revision, timestamp, package_name) if not os.path.exists(package_path): self._downlowd_package( self._get_package_uri(artifact, revision, timestamp, package_name), package_path) # Unpack the tarball package_dir = package_path[0:len(package_path) - len('.tar.gz')] if os.path.exists(package_dir): cmd = ['rm', '-rf', package_dir] subprocess.check_call(cmd) cmd = ['tar', '-zxf', package_path, '-C', os.path.dirname(package_dir)] subprocess.check_call(cmd) # Link the package dir to the 'current' current_dir = self.get_current_package_dir(service, cluster) if os.path.lexists(current_dir): os.unlink(current_dir) os.symlink(package_dir, current_dir) # Link the package dir to the run dir symbol_package_dir = self.get_package_dir(service, cluster, job) if os.path.lexists(symbol_package_dir): os.unlink(symbol_package_dir) os.symlink(package_dir, symbol_package_dir) return package_dir def _update_run_cfg(self, file_path, section, key, value): run_config = ConfigParser.SafeConfigParser() run_config.read([file_path]) run_config.set(section, key, value) fp = open(file_path, 'w') run_config.write(fp) fp.close() def _prepare_run_env(self, service, cluster, job, **config_dict): artifact = config_dict.get('artifact') if not artifact: return 'Invalid config_dict: can\'t find artifact' # Create run dirs run_dir = self.get_run_dir(service, cluster, job) if not os.path.exists(run_dir): os.makedirs(run_dir) # Create stdout dir stdout_dir = self.get_stdout_dir(service, cluster, job) if not os.path.exists(stdout_dir): os.makedirs(stdout_dir) # Create and link log dir to the run dir log_dir = self.get_log_dir(service, cluster, job) if os.path.exists(log_dir): if not self._check_dir_empty(log_dir): return 'The log dir %s is not empty, please do cleanup first' % log_dir else: os.makedirs(log_dir) symbol_log_dir = '%s/log' % run_dir if not os.path.exists(symbol_log_dir): os.symlink(log_dir, symbol_log_dir) # Create and link data dirs to the run dir data_dirs = self.global_config.get('data_dirs', DEFAULT_DATA_DIRS).split(',') data_dir_indexes = (config_dict.get('data_dir_indexes') or '0') for i in data_dir_indexes.split(','): data_dir = '%s/%s/%s/%s' % (data_dirs[int(i)], service, cluster, job) if os.path.exists(data_dir): if not self._check_dir_empty(data_dir): return 'The data dir %s is not empty, please do cleanup first' % data_dir else: os.makedirs(data_dir) symbol_data_dir = '%s/%s' % (run_dir, os.path.basename(data_dirs[int(i)])) if not os.path.exists(symbol_data_dir): os.symlink(data_dir, symbol_data_dir) # Check the package information force_update = config_dict.get('force_update', False) if force_update: package_info = self._get_latest_package_info(artifact) if package_info: package_name = package_info.get('package_name') revision = package_info.get('revision') timestamp = package_info.get('timestamp') else: package_name = config_dict.get('package_name') revision = config_dict.get('revision') timestamp = config_dict.get('timestamp') if not (package_name and revision and timestamp): package_info = self._get_latest_package_info(artifact) if package_info: package_name = package_info.get('package_name') revision = package_info.get('revision') timestamp = package_info.get('timestamp') if not (package_name and revision and timestamp): return 'No package found on package server of %s' % artifact # Write the job's run.cfg package_dir = self._make_package_dir(artifact, service, cluster, job, revision, timestamp, package_name) cleanup_token = config_dict.get('cleanup_token', str()) run_config = ConfigParser.SafeConfigParser() run_config.add_section('run_info') run_config.set('run_info', 'cleanup_token', cleanup_token) run_config.set('run_info', 'data_dir_indexes', data_dir_indexes) run_config.set('run_info', 'run_dir', run_dir) run_config.set('run_info', 'log_dir', log_dir) run_config.set('run_info', 'package_dir', package_dir) fp = open('%s/%s' % (run_dir, JOB_RUN_CONFIG), 'w') run_config.write(fp) fp.close() return SUCCESS_STATUS def _do_bootstrap(self, service, cluster, job, **config_dict): # prepare run dir message = self._prepare_run_env(service, cluster, job, **config_dict) if message != SUCCESS_STATUS: return message # Write other config files to local disk config_files = config_dict.get('config_files') service_root = self.get_run_dir(service, cluster, job) if config_files: self._write_config_files(service_root, **config_files) # Do bootstraping bootstrap_sh = config_dict.get('bootstrap.sh') if bootstrap_sh: self._write_file('%s/bootstrap.sh' % service_root, bootstrap_sh) cmd = ['/bin/bash', '%s/bootstrap.sh' % service_root] subprocess.call(cmd) return SUCCESS_STATUS def _do_start(self, service, cluster, job, **config_dict): artifact = config_dict.get('artifact') if not artifact: return 'Inval config_dict: can\'t find artifact' if not self._check_bootstrapped(service, cluster, job): return "You should bootstrap the job first" # Check if need update the package force_update = config_dict.get('force_update', False) if force_update: package_info = self._get_latest_package_info(artifact) if package_info: package_name = package_info.get('package_name') revision = package_info.get('revision') timestamp = package_info.get('timestamp') else: package_name = config_dict.get('package_name') revision = config_dict.get('revision') timestamp = config_dict.get('timestamp') if (package_name and revision and timestamp): package_path = '%s/%s/%s-%s/%s' % (self.global_config.get( 'package_root'), artifact, revision, timestamp, package_name) if not os.path.exists(package_path): self._downlowd_package( self._get_package_uri(artifact, revision, timestamp, package_name), package_path) package_dir = self._make_package_dir(artifact, service, cluster, job, revision, timestamp, package_name) run_cfg = '%s/%s' % (self.get_run_dir(service, cluster, job), JOB_RUN_CONFIG) self._update_run_cfg(run_cfg, 'run_info', 'package_dir', package_dir) # Write the start script to local disk start_sh = config_dict.get('start.sh') service_root = self.get_run_dir(service, cluster, job) if not start_sh and not os.path.exists('%s/start.sh' % service_root): return 'No start script found' elif start_sh: self._write_file('%s/start.sh' % service_root, start_sh) # Write other config files to local disk config_files = config_dict.get('config_files') if config_files: self._write_config_files(service_root, **config_files) # Write supervisor config http_url = config_dict.get('http_url', '') process_name = self._get_process_name(service, cluster, job) job_config = ConfigParser.SafeConfigParser() section = 'program:%s' % process_name job_config.add_section(section) job_config.set(section, 'command', '/bin/bash %s/start.sh' % service_root) job_config.set(section, 'process_name', process_name) job_config.set(section, 'directory', service_root) job_config.set(section, 'http_url', http_url) # Process will be unconditionally restarted when it exits, without regard # to its exit code job_config.set(section, 'autorestart', 'true') job_config.set(section, 'exitcodes', str(DEFAULT_EXPECTED_EXIT_CODE)) # Process will NOT be automatically started when supervisor restart. job_config.set(section, 'autostart', 'false') fp = open('%s/%s/%s.cfg' % (os.getcwd(), CONFIG_PATH, process_name), 'wb') job_config.write(fp) fp.close() # Start the job self.supervisor_rpcinterface.reloadConfig() try: self.supervisor_rpcinterface.addProcessGroup(process_name) except RPCError, e: if e.code != Faults.ALREADY_ADDED: raise e self.supervisor_rpcinterface.startProcess(process_name)() return SUCCESS_STATUS