def __init__(self, params, registration_id, timestamp, username, is_admin,token, db, temp_dir, docker_base_url, docker_registry_host, docker_push_allow_insecure, nms_url, nms_admin_token, module_details, ref_data_base, kbase_endpoint, prev_dev_version): self.db = db self.params = params # at this point, we assume git_url has been checked self.git_url = params['git_url'] self.registration_id = registration_id self.timestamp = timestamp self.username = username self.is_admin = is_admin self.token = token self.db = db self.temp_dir = temp_dir self.docker_base_url = docker_base_url self.docker_registry_host = docker_registry_host self.docker_push_allow_insecure = docker_push_allow_insecure self.nms_url = nms_url self.nms = NarrativeMethodStore(self.nms_url, token=nms_admin_token) self.local_function_reader = LocalFunctionReader() # (most) of the mongo document for this module snapshot before this registration self.module_details = module_details self.log_buffer = []; self.last_log_time = time.time() # in seconds self.log_interval = 1.0 # save log to mongo every second self.ref_data_base = ref_data_base self.kbase_endpoint = kbase_endpoint self.prev_dev_version = prev_dev_version
def create_app_dictionary(): #Create App Dictionary: Main function requests.packages.urllib3.disable_warnings() catalog = Catalog(url=os.environ['CATALOG_URL']) nms = NarrativeMethodStore(url=os.environ['NARRATIVE_METHOD_STORE']) apps = nms.list_methods({"tag": "release"}) apps_datastruc = pd.DataFrame.from_dict(apps) ModDfApps = data_configure(apps_datastruc) ModDfApps.drop([ 'app_type', 'authors', 'git_commit_hash', 'icon', 'input_types', 'module_name', 'name', 'namespace', 'output_types', 'subtitle', 'tooltip', 'ver' ], axis=1, inplace=True) keys = list( set([ item for sublist in list(ModDfApps.categories) for item in sublist ])) app_dict = {k: [] for k in keys} for i in ModDfApps.index.values: app_category_lst = ModDfApps["categories"][i] for category in app_category_lst: if category in app_dict.keys(): app_dict[category].append(ModDfApps["id"][i]) app_dict[category] = list(set(app_dict[category])) else: raise KeyError("{} not a KBase app category".format(category)) return app_dict
def __init_client(client_name): if client_name == 'workspace': c = Workspace(URLS.workspace) elif client_name == 'job_service': c = NarrativeJobService(URLS.job_service) elif client_name == 'narrative_method_store': c = NarrativeMethodStore(URLS.narrative_method_store) elif client_name == 'user_and_job_state': c = UserAndJobState(URLS.user_and_job_state) elif client_name == 'catalog': c = Catalog(URLS.catalog) else: raise ValueError('Unknown client name "%s"' % client_name) __clients[client_name] = c return c
def setUpClass(cls): print('++++++++++++ RUNNING core_registration_test.py +++++++++++') # hack for testing!! remove when docker and NMS components can be tested from biokbase.catalog.registrar import Registrar Registrar._TEST_WITHOUT_DOCKER = True cls.cUtil = CatalogTestUtil( '.') # TODO: pass in test directory from outside cls.cUtil.setUp() cls.catalog = Catalog(cls.cUtil.getCatalogConfig()) # approve developers we will use cls.catalog.approve_developer(cls.cUtil.admin_ctx(), cls.cUtil.admin_ctx()['user_id']) cls.catalog.approve_developer(cls.cUtil.admin_ctx(), cls.cUtil.user_ctx()['user_id']) cls.nms = NarrativeMethodStore(cls.cUtil.getCatalogConfig()['nms-url'])
def __init_client(client_name, token=None): if client_name == 'workspace': c = Workspace(URLS.workspace, token=token) elif client_name == 'narrative_method_store': c = NarrativeMethodStore(URLS.narrative_method_store, token=token) elif client_name == 'user_and_job_state': c = UserAndJobState(URLS.user_and_job_state, token=token) elif client_name == 'catalog': c = Catalog(URLS.catalog, token=token) elif client_name == 'service' or client_name == 'service_wizard': c = ServiceClient(URLS.service_wizard, use_url_lookup=True, token=token) elif client_name == 'execution_engine2' or client_name == 'execution_engine' or client_name == 'job_service': c = execution_engine2(URLS.execution_engine2, token=token) elif client_name == 'job_service_mock': c = JobServiceMock() else: raise ValueError('Unknown client name "%s"' % client_name) return c
def __init_client(client_name, token=None): if client_name == "workspace": c = Workspace(URLS.workspace, token=token) elif client_name == "execution_engine2": c = execution_engine2(URLS.execution_engine2, token=token) elif client_name == "narrative_method_store": c = NarrativeMethodStore(URLS.narrative_method_store, token=token) elif client_name == "service": c = ServiceClient(URLS.service_wizard, use_url_lookup=True, token=token) elif client_name == "catalog": c = Catalog(URLS.catalog, token=token) else: raise ValueError('Unknown client name "%s"\n' % client_name + "The following client names are recognised:\n" + 'Catalog: "catalog"\n' + 'Execution Engine 2: "execution_engine2"\n' + 'NMS: "narrative_method_store"\n' + 'Service Wizard: "service"\n' + 'Workspace: "workspace"') return c
def __init__(self, params, registration_id, timestamp, username, token, db, temp_dir, docker_base_url, docker_registry_host, nms_url, nms_admin_user, nms_admin_psswd, module_details): self.db = db self.params = params # at this point, we assume git_url has been checked self.git_url = params['git_url'] self.registration_id = registration_id self.timestamp = timestamp self.username = username self.token = token self.db = db self.temp_dir = temp_dir self.docker_base_url = docker_base_url self.docker_registry_host = docker_registry_host self.nms_url = nms_url self.nms_admin_user = nms_admin_user self.nms_admin_psswd = nms_admin_psswd self.nms = NarrativeMethodStore(self.nms_url,user_id=self.nms_admin_user,password=self.nms_admin_psswd) # (most) of the mongo document for this module snapshot before this registration self.module_details = module_details
#Create App Dictionary: Main function import requests requests.packages.urllib3.disable_warnings() from biokbase.catalog.Client import Catalog from biokbase.narrative_method_store.client import NarrativeMethodStore catalog = Catalog(url="https://kbase.us/services/catalog") nms = NarrativeMethodStore( url="https://kbase.us/services/narrative_method_store/rpc") from data_configure import data_configure import pandas as pd def create_app_dictionary(): apps = nms.list_methods({"tag": "release"}) apps_datastruc = pd.DataFrame.from_dict(apps) ModDfApps = data_configure(apps_datastruc) ModDfApps.drop([ 'app_type', 'authors', 'git_commit_hash', 'icon', 'input_types', 'module_name', 'name', 'namespace', 'output_types', 'subtitle', 'tooltip', 'ver' ], axis=1, inplace=True) keys = list( set([ item for sublist in list(ModDfApps.categories) for item in sublist ])) app_dict = {k: [] for k in keys}
class Registrar: # params is passed in from the controller, should be the same as passed into the spec # db is a reference to the Catalog DB interface (usually a MongoCatalogDBI instance) def __init__(self, params, registration_id, timestamp, username, is_admin, token, db, temp_dir, docker_base_url, docker_registry_host, docker_push_allow_insecure, nms_url, nms_admin_token, module_details, ref_data_base, kbase_endpoint, prev_dev_version): self.db = db self.params = params # at this point, we assume git_url has been checked self.git_url = params['git_url'] self.registration_id = registration_id self.timestamp = timestamp self.username = username self.is_admin = is_admin self.token = token self.db = db self.temp_dir = temp_dir self.docker_base_url = docker_base_url self.docker_registry_host = docker_registry_host self.docker_push_allow_insecure = docker_push_allow_insecure self.nms_url = nms_url self.nms = NarrativeMethodStore(self.nms_url, token=nms_admin_token) self.local_function_reader = LocalFunctionReader() # (most) of the mongo document for this module snapshot before this registration self.module_details = module_details self.log_buffer = [] self.last_log_time = time.time() # in seconds self.log_interval = 1.0 # save log to mongo every second self.ref_data_base = ref_data_base self.kbase_endpoint = kbase_endpoint self.prev_dev_version = prev_dev_version def start_registration(self): try: self.logfile = codecs.open( self.temp_dir + '/registration.log.' + self.registration_id, 'w', 'utf-8') self.log('Registration started on ' + str(datetime.datetime.now()) + ' by ' + self.username) self.log('Registration ID: ' + str(self.registration_id)) self.log('Registration Parameters: ' + str(self.params)) ############################## # 1 - clone the repo into the temp directory that should already be reserved for us self.set_build_step('cloning git repo') if not os.path.isdir( os.path.join(self.temp_dir, self.registration_id)): raise ( 'Directory for the git clone was not allocated! This is an internal catalog server error, please report this problem.' ) basedir = os.path.join(self.temp_dir, self.registration_id, 'module_repo') parsed_url = urlparse(self.git_url) self.log('Attempting to clone into: ' + basedir) self.log('git clone ' + self.git_url) subprocess.check_call(['git', 'clone', self.git_url, basedir]) # try to get hash from repo git_commit_hash = str( subprocess.check_output( ['git', 'log', '--pretty=%H', '-n', '1'], cwd=basedir)).rstrip() self.log('current commit hash at HEAD:' + git_commit_hash) if 'git_commit_hash' in self.params: if self.params['git_commit_hash']: self.log('git checkout ' + self.params['git_commit_hash'].strip()) subprocess.check_call([ 'git', 'checkout', '--quiet', self.params['git_commit_hash'] ], cwd=basedir) git_commit_hash = self.params['git_commit_hash'].strip() # check if this was a git_commit_hash that was already released- if so, we abort for now (we could just update the dev tag in the future) for r in self.module_details['release_version_list']: if r['git_commit_hash'] == git_commit_hash: raise ValueError( 'The specified commit is already released. You cannot reregister that commit version or image.' ) # do the same for beta versions for now if 'beta' in self.module_details[ 'current_versions'] and self.module_details[ 'current_versions']['beta'] is not None: if self.module_details['current_versions']['beta'][ 'git_commit_hash'] == git_commit_hash: raise ValueError( 'The specified commit is already registered and in beta. You cannot reregister that commit version or image.' ) ############################## # 2 - sanity check (things parse, files exist, module_name matches, etc) self.set_build_step('reading files and performing basic checks') self.sanity_checks_and_parse(basedir, git_commit_hash) ############################## # 2.5 - dealing with git releases .git/config.lock, if it still exists after 5s then kill it ###### should no longer need this after switching to subprocess # git_config_lock_file = os.path.join(basedir, ".git", "config.lock") # if os.path.exists(git_config_lock_file): # self.log('.git/config.lock exists, waiting 5s for it to release') # time.sleep(5) # if os.path.exists(git_config_lock_file): # self.log('.git/config.lock file still there, we are just going to delete it....') # os.remove(git_config_lock_file) ############################## # 3 docker build - in progress # perhaps make this a self attr? module_name_lc = self.get_required_field_as_string( self.kb_yaml, 'module-name').strip().lower() self.image_name = self.docker_registry_host + '/kbase:' + module_name_lc + '.' + str( git_commit_hash) ref_data_folder = None ref_data_ver = None compilation_report = None if not Registrar._TEST_WITHOUT_DOCKER: # timeout set to 24 hours because we often get timeouts if multiple people try to push at the same time dockerclient = None docker_timeout = 86400 if len(str(self.docker_base_url)) > 0: dockerclient = DockerClient(base_url=str( self.docker_base_url), timeout=docker_timeout) else: # docker base URL is not set in config, let's use Docker-related env-vars in this case docker_host = os.environ['DOCKER_HOST'] if docker_host is None or len(docker_host) == 0: raise ValueError( 'Docker host should be defined either in configuration ' '(docker-base-url property) or in DOCKER_HOST environment variable' ) docker_tls_verify = os.environ['DOCKER_TLS_VERIFY'] if docker_host.startswith('tcp://'): docker_protocol = "http" if (docker_tls_verify is not None) and docker_tls_verify == '1': docker_protocol = "https" docker_host = docker_host.replace( 'tcp://', docker_protocol + '://') docker_cert_path = os.environ['DOCKER_CERT_PATH'] docker_tls = False if (docker_cert_path is not None) and len(docker_cert_path) > 0: docker_tls = DockerTLSConfig( verify=False, client_cert=(docker_cert_path + '/cert.pem', docker_cert_path + '/key.pem')) self.log( "Docker settings from environment variables are used: docker-host = " + docker_host + ", docker_cert_path = " + str(docker_cert_path)) dockerclient = DockerClient(base_url=docker_host, timeout=docker_timeout, version='auto', tls=docker_tls) # look for docker image # this tosses cookies if image doesn't exist, so wrap in try, and build if try reports "not found" #self.log(str(dockerclient.inspect_image(repo_name))) # if image does not exist, build and set state self.set_build_step('building the docker image') # imageId is not yet populated properly imageId = self.build_docker_image(dockerclient, self.image_name, basedir) # check if reference data version is defined in kbase.yml if 'data-version' in self.kb_yaml: ref_data_ver = str(self.kb_yaml['data-version']).strip() if ref_data_ver: ref_data_folder = module_name_lc target_ref_data_dir = os.path.join( self.ref_data_base, ref_data_folder, ref_data_ver) if os.path.exists(target_ref_data_dir): self.log( "Reference data for " + ref_data_folder + "/" + ref_data_ver + " was " + "already prepared, initialization step is skipped" ) else: self.set_build_step( 'preparing reference data (running init entry-point), ' + 'ref-data version: ' + ref_data_ver) self.prepare_ref_data( dockerclient, self.image_name, self.ref_data_base, ref_data_folder, ref_data_ver, basedir, self.temp_dir, self.registration_id, self.token, self.kbase_endpoint) self.set_build_step('preparing compilation report') self.log('Preparing compilation report.') # Trying to extract compilation report with line numbers of funcdefs from docker image. # There is "report" entry-point command responsible for that. In case there are any # errors we just skip it. compilation_report = self.prepare_compilation_report( dockerclient, self.image_name, basedir, self.temp_dir, self.registration_id, self.token, self.kbase_endpoint) if compilation_report is None: raise ValueError( 'Unable to generate a compilation report, which is now required, so your registration cannot continue. ' + 'If you have been successfully registering this module already, this means that you may need to update ' + 'to the latest version of the KBase SDK and rebuild your makefile.' ) self.local_function_reader.finish_validation( compilation_report) self.log('Report complete') self.set_build_step('pushing docker image to registry') self.push_docker_image(dockerclient, self.image_name) else: self.log( 'IN TEST MODE!! SKIPPING DOCKER BUILD AND DOCKER REGISTRY UPDATE!!' ) # 4 - Update the DB self.set_build_step('updating the catalog') self.update_the_catalog(basedir, ref_data_folder, ref_data_ver, compilation_report) self.build_is_complete() except Exception as e: # set the build state to error and log it self.set_build_error(str(e)) self.log(traceback.format_exc(), is_error=True) self.log('BUILD_ERROR: ' + str(e), is_error=True) if self.prev_dev_version: self.log('Reverting dev version to git_commit_hash=' + self.prev_dev_version['git_commit_hash'] + ', version=' + self.prev_dev_version['version'] + ', git_commit_message=' + self.prev_dev_version['git_commit_message']) self.db.update_dev_version(self.prev_dev_version, git_url=self.git_url) finally: self.flush_log_to_db() self.logfile.close() self.cleanup() def sanity_checks_and_parse(self, basedir, git_commit_hash): # check that files exist yaml_filename = 'kbase.yaml' if not os.path.isfile(os.path.join(basedir, 'kbase.yaml')): if not os.path.isfile(os.path.join(basedir, 'kbase.yml')): raise ValueError( 'kbase.yaml file does not exist in repo, but is required!') else: yaml_filename = 'kbase.yml' # parse some stuff, and check for things with codecs.open(os.path.join(basedir, yaml_filename), 'r', "utf-8", errors='ignore') as kb_yaml_file: kb_yaml_string = kb_yaml_file.read() self.kb_yaml = yaml.load(kb_yaml_string) self.log('=====kbase.yaml parse:') self.log(pprint.pformat(self.kb_yaml)) self.log('=====end kbase.yaml') module_name = self.get_required_field_as_string( self.kb_yaml, 'module-name').strip() module_description = self.get_required_field_as_string( self.kb_yaml, 'module-description').strip() version = self.get_required_field_as_string(self.kb_yaml, 'module-version').strip() # must be a semantic version if not semantic_version.validate(version): raise ValueError( 'Invalid version string in kbase.yaml - must be in semantic version format. See http://semver.org' ) service_language = self.get_required_field_as_string( self.kb_yaml, 'service-language').strip() owners = self.get_required_field_as_list(self.kb_yaml, 'owners') service_config = self.get_optional_field_as_dict( self.kb_yaml, 'service-config') if service_config: # validate service_config parameters if 'dynamic-service' in service_config: if not type(service_config['dynamic-service']) == type(True): raise ValueError( 'Invalid service-config in kbase.yaml - "dynamic-service" property must be a boolean "true" or "false".' ) # module_name must match what exists (unless it is not yet defined) if 'module_name' in self.module_details: if self.module_details['module_name'] != module_name: raise ValueError( 'kbase.yaml file module_name field has changed since last version! ' + 'Module names are permanent- if this is a problem, contact a kbase admin.' ) else: # This must be the first registration, so the module must not exist yet self.check_that_module_name_is_valid(module_name) # associate the module_name with the log file for easier searching (if we fail sooner, then the module name # cannot be used to lookup this log) self.db.set_build_log_module_name(self.registration_id, module_name) # you can't remove yourself from the owners list, or register something that you are not an owner of if self.username not in owners and self.is_admin is False: raise ValueError( 'Your kbase username (' + self.username + ') must be in the owners list in the kbase.yaml file.') # OPTIONAL TODO: check if all the users are on the owners list? not necessarily required, because we # do a check during registration of the person who started the registration... # TODO: check for directory structure, method spec format, documentation, version self.validate_method_specs(basedir) # initial validation of the local function specifications lf_report = self.local_function_reader.parse_and_basic_validation( basedir, self.module_details, module_name, version, git_commit_hash) self.log( self.local_function_reader.report_to_string_for_log(lf_report)) if len(lf_report['functions_errored']) > 0: raise ValueError('Errors exist in local function specifications.') # return the parse so we can figure things out later return self.kb_yaml def check_that_module_name_is_valid(self, module_name): if self.db.is_registered(module_name=module_name): raise ValueError( 'Module name (in kbase.yaml) is already registered. Please specify a different name and try again.' ) if self.db.module_name_lc_exists(module_name_lc=module_name.lower()): raise ValueError( 'The case-insensitive module name (in kbase.yaml) is not unique. Please specify a different name.' ) # only allow alphanumeric and underscore if not re.match(r'^[A-Za-z0-9_]+$', module_name): raise ValueError( 'Module names must be alphanumeric characters (including underscores) only, with no spaces.' ) def update_the_catalog(self, basedir, ref_data_folder, ref_data_ver, compilation_report): # get the basic info that we need commit_hash = str( subprocess.check_output(['git', 'log', '--pretty=%H', '-n', '1'], cwd=basedir)).rstrip() commit_message = str( subprocess.check_output(['git', 'log', '--pretty=%B', '-n', '1'], cwd=basedir)).rstrip() module_name = self.get_required_field_as_string( self.kb_yaml, 'module-name') module_description = self.get_required_field_as_string( self.kb_yaml, 'module-description') version = self.get_required_field_as_string(self.kb_yaml, 'module-version') service_language = self.get_required_field_as_string( self.kb_yaml, 'service-language') owners = self.get_required_field_as_list(self.kb_yaml, 'owners') service_config = self.get_optional_field_as_dict( self.kb_yaml, 'service-config') # first update the module name, which is now permanent, if we haven't already if ('module_name' not in self.module_details) or ('module_name_lc' not in self.module_details): error = self.db.set_module_name(self.git_url, module_name) if error is not None: raise ValueError( 'Unable to set module_name - there was an internal database error.' + error) # TODO: Could optimize by combining all these things into one mongo call, but for now this is easier. # Combining it into one call would just mean that this update happens as a single transaction, but a partial # update for now that fails midstream is probably not a huge issue- we can always reregister. # next update the basic information info = { 'description': module_description, 'language': service_language } if service_config and service_config['dynamic-service']: info['dynamic_service'] = 1 else: info['dynamic_service'] = 0 local_functions = self.local_function_reader.extract_lf_records() if len(local_functions) > 0: info['local_functions'] = 1 else: info['local_functions'] = 0 self.log('new info: ' + pprint.pformat(info)) error = self.db.set_module_info(info, git_url=self.git_url) if error is not None: raise ValueError( 'Unable to set module info - there was an internal database error: ' + str(error)) # next update the owners ownersListForUpdate = [] for o in owners: # TODO: add some validation that the username is a valid kbase user ownersListForUpdate.append({'kb_username': o}) self.log('new owners list: ' + pprint.pformat(ownersListForUpdate)) error = self.db.set_module_owners(ownersListForUpdate, git_url=self.git_url) if error is not None: raise ValueError( 'Unable to set module owners - there was an internal database error: ' + str(error)) # finally update the actual dev version info narrative_methods = [] if os.path.isdir(os.path.join(basedir, 'ui', 'narrative', 'methods')): for m in os.listdir( os.path.join(basedir, 'ui', 'narrative', 'methods')): if os.path.isdir( os.path.join(basedir, 'ui', 'narrative', 'methods', m)): narrative_methods.append(m) if len(local_functions) > 0: self.log('Saving local function specs:') self.log(pprint.pformat(local_functions)) error = self.db.save_local_function_specs(local_functions) if error is not None: raise ValueError( 'There was an error saving local function specs, DB says: ' + str(error)) new_version = { 'module_name': module_name.strip(), 'module_name_lc': module_name.strip().lower(), 'module_description': module_description, 'released': 0, 'released_timestamp': None, 'notes': '', 'timestamp': self.timestamp, 'registration_id': self.registration_id, 'version': version, 'git_commit_hash': commit_hash, 'git_commit_message': commit_message, 'narrative_methods': narrative_methods, 'local_functions': self.local_function_reader.extract_lf_names(), 'docker_img_name': self.image_name, 'compilation_report': compilation_report } if ref_data_ver: new_version['data_folder'] = ref_data_folder new_version['data_version'] = ref_data_ver if service_config and service_config['dynamic-service']: new_version['dynamic_service'] = 1 else: new_version['dynamic_service'] = 0 self.log('new dev version object: ' + pprint.pformat(new_version)) error = self.db.update_dev_version(new_version) if error is not None: raise ValueError( 'Unable to update dev version - there was an internal database error: ' + str(error)) #push to NMS self.log('registering specs with NMS') self.nms.register_repo({ 'git_url': self.git_url, 'git_commit_hash': commit_hash }) self.log('\ndone') # done!!! def validate_method_specs(self, basedir): self.log('validating narrative method specifications') if os.path.isdir(os.path.join(basedir, 'ui', 'narrative', 'methods')): for m in os.listdir( os.path.join(basedir, 'ui', 'narrative', 'methods')): if os.path.isdir( os.path.join(basedir, 'ui', 'narrative', 'methods', m)): self.log(' - validating method: ' + m) # first grab the spec and display files, which are required method_path = os.path.join(basedir, 'ui', 'narrative', 'methods', m) if not os.path.isfile( os.path.join(method_path, 'spec.json')): raise ValueError( 'Invalid narrative method specification (' + m + '): No spec.json file defined.') if not os.path.isfile( os.path.join(method_path, 'display.yaml')): raise ValueError( 'Invalid narrative method specification (' + m + '): No spec.json file defined.') with codecs.open(os.path.join(method_path, 'spec.json'), 'r', "utf-8", errors='ignore') as spec_json_file: spec_json = spec_json_file.read() with codecs.open(os.path.join(method_path, 'display.yaml'), 'r', "utf-8", errors='ignore') as display_yaml_file: display_yaml = display_yaml_file.read() # gather any extra html files extraFiles = {} for extra_file_name in os.listdir( os.path.join(method_path)): if not os.path.isfile( os.path.join(method_path, extra_file_name)): break if not extra_file_name.endswith('.html'): break with codecs.open(oos.path.join(method_path, extra_file_name), 'r', "utf-8", errors='ignore') as extra_file: extrafiles[extra_file_name] = extra_file.read() # validate against the NMS target endpoint result = self.nms.validate_method({ 'id': m, 'spec_json': spec_json, 'display_yaml': display_yaml, 'extra_files': extraFiles }) # inspect results if result['is_valid'] > 0: self.log(' - valid!') if 'warnings' in result: if result['warnings']: for w in result['warnings']: self.log(' - warning: ' + w) else: self.log(' - not valid!', is_error=True) if 'errors' in result: if result['errors']: for e in result['errors']: self.log(' - error: ' + e, is_error=True) else: self.log(' - error is undefined!' + e, is_error=True) raise ValueError( 'Invalid narrative method specification (' + m + ')') else: self.log( ' - no ui/narrative/methods directory found, so no narrative methods will be deployed' ) def get_required_field_as_string(self, kb_yaml, field_name): if field_name not in kb_yaml: raise ValueError('kbase.yaml file missing "' + field_name + '" required field') value = kb_yaml[field_name].strip() if not value: raise ValueError('kbase.yaml file missing value for "' + field_name + '" required field') return value def get_required_field_as_list(self, kb_yaml, field_name): if field_name not in kb_yaml: raise ValueError('kbase.yaml file missing "' + field_name + '" required field') value = kb_yaml[field_name] if not type(value) is list: raise ValueError('kbase.yaml file "' + field_name + '" required field must be a list') return value def get_optional_field_as_dict(self, kb_yaml, field_name): if field_name not in kb_yaml: return None value = kb_yaml[field_name] if not type(value) is dict: raise ValueError('kbase.yaml file "' + field_name + '" optional field must be a dict') return value def log(self, message, no_end_line=False, is_error=False): if no_end_line: content = message else: content = message + '\n' self.logfile.write(content) self.logfile.flush() lines = content.splitlines() for l in lines: # add each line to the buffer if len(l) > 1000: l = l[0:1000] + ' ... truncated to 1k characters of ' + str( len(l)) self.log_buffer.append({'content': l + '\n', 'error': is_error}) # save the buffer to mongo if enough time has elapsed, or the buffer is more than 1000 lines if (time.time() - self.last_log_time > self.log_interval) or (len( self.log_buffer) > 1000): self.flush_log_to_db() def flush_log_to_db(self): # todo: if we lose log lines, that's ok. Make sure we handle case if log is larger than mongo doc size self.db.append_to_build_log(self.registration_id, self.log_buffer) self.log_buffer = [] #clear the buffer self.last_log_time = time.time() # reset the log timer def set_build_step(self, step): self.db.set_module_registration_state(git_url=self.git_url, new_state='building: ' + step) self.db.set_build_log_state(self.registration_id, 'building: ' + step) def set_build_error(self, error_message): self.db.set_module_registration_state(git_url=self.git_url, new_state='error', error_message=error_message) self.db.set_build_log_state(self.registration_id, 'error', error_message=error_message) def build_is_complete(self): self.db.set_module_registration_state(git_url=self.git_url, new_state='complete') self.db.set_build_log_state(self.registration_id, 'complete') def cleanup(self): if os.path.isdir(os.path.join(self.temp_dir, self.registration_id)): shutil.rmtree(os.path.join(self.temp_dir, self.registration_id)) def build_docker_image(self, docker_client, image_name, basedir): self.log('\nBuilding the docker image for ' + image_name) # examine stream to determine success/failure of build imageId = None for lines in docker_client.build(path=basedir, rm=True, tag=image_name, pull=False): for line in lines.strip().splitlines(): line_parse = json.loads(line.strip()) if 'stream' in line_parse: self.log(line_parse['stream'], no_end_line=True) if 'errorDetail' in line_parse: self.log(str(line_parse), no_end_line=True) raise ValueError('Docker build failed: ' + str(line_parse['errorDetail'])) imageId = docker_client.inspect_image(image_name)['Id'] self.log('Docker build successful.') self.log(' Image Id: ' + str(imageId)) self.log(' Image Name: ' + str(image_name) + '\n\n') return imageId def push_docker_image(self, docker_client, image_name): self.log('\nPushing docker image to registry for ' + image_name) colon_pos = image_name.rfind( ':' ) # This logic supports images with "host:port/" prefix for private registry image = image_name[:colon_pos] tag = image_name[colon_pos + 1:] #response = [ line for line in docker_client.push(image, tag=tag, stream=True) ] #response_stream = response #self.log(str(response_stream)) # to do: examine stream to determine success/failure of build if self.docker_push_allow_insecure: print("Docker push: insecure_registry: " + str(self.docker_push_allow_insecure)) else: print("Docker push: insecure_registry: None") for lines in docker_client.push( image, tag=tag, stream=True, insecure_registry=self.docker_push_allow_insecure): for line in lines.strip().splitlines(): # example line: #'{"status":"Pushing","progressDetail":{"current":32,"total":32},"progress":"[==================================================\\u003e] 32 B/32 B","id":"da200da4256c"}' line_parse = json.loads(line) log_line = '' if 'id' in line_parse: log_line += line_parse['id'] + ' - ' if 'status' in line_parse: log_line += line_parse['status'] if 'progress' in line_parse: log_line += ' - ' + line_parse['progress'] #if 'progressDetail' in line_parse: # self.log(' - ' + str(line_parse['progressDetail']),no_end_line=True) # catch anything unexpected, we should probably throw an error here for key in line_parse: if key not in [ 'id', 'status', 'progress', 'progressDetail' ]: log_line += '[' + key + '=' + str( line_parse[key]) + '] ' self.log(log_line) if 'error' in line_parse: self.log(str(line_parse), no_end_line=True) raise ValueError('Docker push failed: ' + str(line_parse['error'])) self.log('done pushing docker image to registry for ' + image_name + '\n') def run_docker_container(self, dockerclient, image_name, token, kbase_endpoint, binds, work_dir, command, print_details=False): cnt_id = None try: token_file = os.path.join(work_dir, "token") with open(token_file, "w") as file: file.write(token) config_file = os.path.join(work_dir, "config.properties") with open(config_file, "w") as file: file.write("[global]\n" + "job_service_url = " + kbase_endpoint + "/userandjobstate\n" + "workspace_url = " + kbase_endpoint + "/ws\n" + "shock_url = " + kbase_endpoint + "/shock-api\n" + "kbase_endpoint = " + kbase_endpoint + "\n") if not binds: binds = {} binds[work_dir] = {"bind": "/kb/module/work", "mode": "rw"} container = dockerclient.create_container( image=image_name, command=command, tty=True, host_config=dockerclient.create_host_config(binds=binds)) cnt_id = container.get('Id') self.log('Running "' + command + '" entry-point command, container Id=' + cnt_id) if print_details: self.log("Command details:") self.log(" Image name: " + image_name) self.log(" Binds: " + str(binds)) self.log(" KBase-endpoint: " + kbase_endpoint) self.log(" Necessary files in '" + work_dir + "': 'token', 'config.properties'") self.log(" Tty: True") self.log(" Docker command: " + command) dockerclient.start(container=cnt_id) stream = dockerclient.logs(container=cnt_id, stdout=True, stderr=True, stream=True) line = "" for char in stream: if char == '\r': continue if char == '\n': self.log(line) line = "" else: line += char if len(line) > 0: self.log(line) finally: # cleaning up the container try: if cnt_id: dockerclient.remove_container(container=cnt_id, v=True, force=True) self.log("Docker container (Id=" + cnt_id + ") was cleaned up") except: pass def prepare_ref_data(self, dockerclient, image_name, ref_data_base, ref_data_folder, ref_data_ver, basedir, temp_dir, registration_id, token, kbase_endpoint): self.log( '\nReference data: creating docker container for initialization') if not os.path.exists(ref_data_base): raise ValueError("Reference data network folder doesn't exist: " + ref_data_base) upper_target_dir = os.path.join(ref_data_base, ref_data_folder) if not os.path.exists(upper_target_dir): os.mkdir(upper_target_dir) temp_ref_data_dir = os.path.join(upper_target_dir, "temp_" + registration_id) try: repo_data_dir = os.path.join(basedir, "data") os.mkdir(temp_ref_data_dir) binds = { temp_ref_data_dir: { "bind": "/data", "mode": "rw" }, repo_data_dir: { "bind": "/kb/module/data", "mode": "rw" } } temp_work_dir = os.path.join(temp_dir, registration_id, 'ref_data_workdir') os.mkdir(temp_work_dir) self.run_docker_container(dockerclient, image_name, token, kbase_endpoint, binds, temp_work_dir, 'init', print_details=True) ready_file = os.path.join(temp_ref_data_dir, "__READY__") if os.path.exists(ready_file): target_dir = os.path.join(upper_target_dir, ref_data_ver) os.rename(temp_ref_data_dir, target_dir) self.log("Reference data was successfully deployed into " + target_dir) else: raise ValueError( "__READY__ file is not detected in reference data folder, produced data will be discarded" ) finally: # cleaning up temporary ref-data (if not renamed into permanent after success) try: if os.path.exists(temp_ref_data_dir): shutil.rmtree(temp_ref_data_dir) except: pass def prepare_compilation_report(self, dockerclient, image_name, basedir, temp_dir, registration_id, token, kbase_endpoint): self.log('\nCompilation report: creating docker container') try: temp_work_dir = os.path.join(temp_dir, registration_id, 'report_workdir') os.mkdir(temp_work_dir) self.run_docker_container(dockerclient, image_name, token, kbase_endpoint, None, temp_work_dir, 'report') report_file = os.path.join(temp_work_dir, 'compile_report.json') if not os.path.exists(report_file): self.log("Report file doesn't exist: " + report_file) return None else: with codecs.open(report_file, 'r', 'utf-8', errors='ignore') as f: return json.load(f) except Exception, e: self.log("Error preparing compilation log: " + str(e)) return None
class Registrar: # params is passed in from the controller, should be the same as passed into the spec # db is a reference to the Catalog DB interface (usually a MongoCatalogDBI instance) def __init__(self, params, registration_id, timestamp, username, token, db, temp_dir, docker_base_url, docker_registry_host, nms_url, nms_admin_user, nms_admin_psswd, module_details): self.db = db self.params = params # at this point, we assume git_url has been checked self.git_url = params['git_url'] self.registration_id = registration_id self.timestamp = timestamp self.username = username self.token = token self.db = db self.temp_dir = temp_dir self.docker_base_url = docker_base_url self.docker_registry_host = docker_registry_host self.nms_url = nms_url self.nms_admin_user = nms_admin_user self.nms_admin_psswd = nms_admin_psswd self.nms = NarrativeMethodStore(self.nms_url,user_id=self.nms_admin_user,password=self.nms_admin_psswd) # (most) of the mongo document for this module snapshot before this registration self.module_details = module_details def start_registration(self): try: self.logfile = open(self.temp_dir+'/registration.log.'+self.registration_id, 'w') self.log('Registration started on '+ str(datetime.datetime.now()) + ' by '+self.username) self.log('Registration ID: '+str(self.registration_id)); self.log('Registration Parameters: '+str(self.params)); ############################## # 1 - clone the repo into the temp directory that should already be reserved for us self.set_build_step('cloning git repo') if not os.path.isdir(os.path.join(self.temp_dir,self.registration_id)): raise('Directory for the git clone was not allocated! This is an internal catalog server error, please report this problem.') basedir = os.path.join(self.temp_dir,self.registration_id,'module_repo') self.log('Attempting to clone into: '+basedir); self.log('git clone ' + self.git_url) repo = git.Repo.clone_from(self.git_url, basedir) # try to get hash from repo self.log('current commit hash at HEAD:' + str(repo.heads.master.commit)) git_commit_hash = repo.heads.master.commit if 'git_commit_hash' in self.params: if self.params['git_commit_hash']: self.log('git checkout ' + self.params['git_commit_hash'].strip()) repo.git.checkout(self.params['git_commit_hash'].strip()) git_commit_hash = self.params['git_commit_hash'].strip() ############################## # 2 - sanity check (things parse, files exist, module_name matches, etc) self.set_build_step('reading files and performing basic checks') self.sanity_checks_and_parse(repo, basedir) ############################## # 3 docker build - in progress # perhaps make this a self attr? module_name_lc = self.get_required_field_as_string(self.kb_yaml,'module-name').strip().lower() self.image_name = self.docker_registry_host + '/' + module_name_lc + ':' + str(git_commit_hash) if not Registrar._TEST_WITHOUT_DOCKER: dockerclient = DockerClient(base_url = str(self.docker_base_url),timeout=360) # look for docker image # this tosses cookies if image doesn't exist, so wrap in try, and build if try reports "not found" #self.log(str(dockerclient.inspect_image(repo_name))) # if image does not exist, build and set state self.set_build_step('building the docker image') # imageId is not yet populated properly imageId = self.build_docker_image(dockerclient,self.image_name,basedir) self.set_build_step('pushing docker image to registry') self.push_docker_image(dockerclient,self.image_name) #self.log(str(dockerClient.containers())); else: self.log('IN TEST MODE!! SKIPPING DOCKER BUILD AND DOCKER REGISTRY UPDATE!!') # 4 - Update the DB self.set_build_step('updating the catalog') self.update_the_catalog(repo, basedir) self.build_is_complete() except Exception as e: # set the build state to error and log it self.set_build_error(str(e)) self.log(traceback.format_exc()) self.log('BUILD_ERROR: '+str(e)) finally: self.logfile.close(); self.cleanup(); def sanity_checks_and_parse(self, repo, basedir): # check that files exist yaml_filename = 'kbase.yaml' if not os.path.isfile(os.path.join(basedir,'kbase.yaml')) : if not os.path.isfile(os.path.join(basedir,'kbase.yml')): raise ValueError('kbase.yaml file does not exist in repo, but is required!') else: yaml_filename = 'kbase.yml' # parse some stuff, and check for things with open(os.path.join(basedir,yaml_filename)) as kb_yaml_file: kb_yaml_string = kb_yaml_file.read() self.kb_yaml = yaml.load(kb_yaml_string) self.log('=====kbase.yaml parse:') self.log(pprint.pformat(self.kb_yaml)) self.log('=====end kbase.yaml') module_name = self.get_required_field_as_string(self.kb_yaml,'module-name').strip() module_description = self.get_required_field_as_string(self.kb_yaml,'module-description').strip() version = self.get_required_field_as_string(self.kb_yaml,'module-version').strip() service_language = self.get_required_field_as_string(self.kb_yaml,'service-language').strip() owners = self.get_required_field_as_list(self.kb_yaml,'owners') # module_name must match what exists (unless it is not yet defined) if 'module_name' in self.module_details: if self.module_details['module_name'] != module_name: raise ValueError('kbase.yaml file module_name field has changed since last version! ' + 'Module names are permanent- if this is a problem, contact a kbase admin.') else: # This must be the first registration, so the module must not exist yet self.check_that_module_name_is_valid(module_name); # you can't remove yourself from the owners list, or register something that you are not an owner of if self.username not in owners: raise ValueError('Your kbase username ('+self.username+') must be in the owners list in the kbase.yaml file.') # OPTIONAL TODO: check if all the users are on the owners list? not necessarily required, because we # do a check during registration of the person who started the registration... # TODO: check for directory structure, method spec format, documentation, version self.validate_method_specs(basedir) # return the parse so we can figure things out later return self.kb_yaml def check_that_module_name_is_valid(self, module_name): if self.db.is_registered(module_name=module_name): raise ValueError('Module name (in kbase.yaml) is already registered. Please specify a different name and try again.') if self.db.module_name_lc_exists(module_name_lc=module_name.lower()): raise ValueError('The case-insensitive module name (in kbase.yaml) is not unique. Please specify a different name.') # only allow alphanumeric and underscore if not re.match(r'^[A-Za-z0-9_]+$', module_name): raise ValueError('Module names must be alphanumeric characters (including underscores) only, with no spaces.') def update_the_catalog(self, repo, basedir): # get the basic info that we need commit_hash = repo.head.commit.hexsha commit_message = repo.head.commit.message module_name = self.get_required_field_as_string(self.kb_yaml,'module-name') module_description = self.get_required_field_as_string(self.kb_yaml,'module-description') version = self.get_required_field_as_string(self.kb_yaml,'module-version') service_language = self.get_required_field_as_string(self.kb_yaml,'service-language') owners = self.get_required_field_as_list(self.kb_yaml,'owners') # first update the module name, which is now permanent, if we haven't already if ('module_name' not in self.module_details) or ('module_name_lc' not in self.module_details): error = self.db.set_module_name(self.git_url, module_name) if error is not None: raise ValueError('Unable to set module_name - there was an internal database error.' +error) # TODO: Could optimize by combining all these things into one mongo call, but for now this is easier. # Combining it into one call would just mean that this update happens as a single transaction, but a partial # update for now that fails midstream is probably not a huge issue- we can always reregister. # next update the basic information info = { 'description': module_description, 'language' : service_language } self.log('new info: '+pprint.pformat(info)) error = self.db.set_module_info(info, git_url=self.git_url) if error is not None: raise ValueError('Unable to set module info - there was an internal database error: '+error) # next update the owners ownersListForUpdate = [] for o in owners: # TODO: add some validation that the username is a valid kbase user ownersListForUpdate.append({'kb_username':o}) self.log('new owners list: '+pprint.pformat(ownersListForUpdate)) error = self.db.set_module_owners(ownersListForUpdate, git_url=self.git_url) if error is not None: raise ValueError('Unable to set module owners - there was an internal database error: '+error) # finally update the actual dev version info narrative_methods = [] if os.path.isdir(os.path.join(basedir,'ui','narrative','methods')) : for m in os.listdir(os.path.join(basedir,'ui','narrative','methods')): if os.path.isdir(os.path.join(basedir,'ui','narrative','methods',m)): narrative_methods.append(m) new_version = { 'timestamp':self.timestamp, 'version' : version, 'git_commit_hash': commit_hash, 'git_commit_message': commit_message, 'narrative_methods': narrative_methods, 'docker_img_name': self.image_name } self.log('new dev version object: '+pprint.pformat(new_version)) error = self.db.update_dev_version(new_version, git_url=self.git_url) if error is not None: raise ValueError('Unable to update dev version - there was an internal database error: '+error) #push to NMS self.log('registering specs with NMS') self.nms.register_repo({'git_url':self.git_url, 'git_commit_hash':commit_hash}) self.log('\ndone') # done!!! def validate_method_specs(self, basedir): self.log('validating narrative method specifications') if os.path.isdir(os.path.join(basedir,'ui','narrative','methods')) : for m in os.listdir(os.path.join(basedir,'ui','narrative','methods')): if os.path.isdir(os.path.join(basedir,'ui','narrative','methods',m)): self.log(' - validating method: '+m) # first grab the spec and display files, which are required method_path = os.path.join(basedir,'ui','narrative','methods',m) if not os.path.isfile(os.path.join(method_path,'spec.json')): raise ValueError('Invalid narrative method specification ('+m+'): No spec.json file defined.') if not os.path.isfile(os.path.join(method_path,'display.yaml')): raise ValueError('Invalid narrative method specification ('+m+'): No spec.json file defined.') with open(os.path.join(method_path,'spec.json')) as spec_json_file: spec_json = spec_json_file.read() with open(os.path.join(method_path,'display.yaml')) as display_yaml_file: display_yaml = display_yaml_file.read() # gather any extra html files extraFiles = {} for extra_file_name in os.listdir(os.path.join(method_path)): if not os.path.isfile(os.path.join(method_path,extra_file_name)): break if not extra_file_name.endswith('.html'): break with open(os.path.join(method_path,extra_file_name)) as extra_file: extrafiles[extra_file_name] = extra_file.read() # validate against the NMS target endpoint result = self.nms.validate_method({'id':m, 'spec_json':spec_json, 'display_yaml':display_yaml, 'extra_files':extraFiles}); # inspect results if result['is_valid']>0: self.log(' - valid!') if 'warnings' in result: if result['warnings']: for w in result['warnings']: self.log(' - warning: '+w) else: self.log(' - not valid!') if 'errors' in result: if result['errors']: for e in result['errors']: self.log(' - error: '+e) else: self.log(' - error is undefined!'+e) raise ValueError('Invalid narrative method specification ('+m+')') else: self.log(' - no ui/narrative/methods directory found, so no narrative methods will be deployed') def get_required_field_as_string(self, kb_yaml, field_name): if field_name not in kb_yaml: raise ValueError('kbase.yaml file missing "'+field_name+'" required field') value = kb_yaml[field_name].strip() if not value: raise ValueError('kbase.yaml file missing value for "'+field_name+'" required field') return value def get_required_field_as_list(self, kb_yaml, field_name): if field_name not in kb_yaml: raise ValueError('kbase.yaml file missing "'+field_name+'" required field') value = kb_yaml[field_name] if not type(value) is list: raise ValueError('kbase.yaml file "'+field_name+'" required field must be a list') return value def log(self, message, no_end_line=False): if no_end_line: self.logfile.write(message) else: self.logfile.write(message+'\n') self.logfile.flush() def set_build_step(self, step): self.db.set_module_registration_state(git_url=self.git_url, new_state='building: '+step) def set_build_error(self, error_message): self.db.set_module_registration_state(git_url=self.git_url, new_state='error', error_message=error_message) def build_is_complete(self): self.db.set_module_registration_state(git_url=self.git_url, new_state='complete') def cleanup(self): if os.path.isdir(os.path.join(self.temp_dir,self.registration_id)): shutil.rmtree(os.path.join(self.temp_dir,self.registration_id)) def build_docker_image(self, docker_client, image_name, basedir): self.log('\nBuilding the docker image for ' + image_name); #response = [ line for line in docker_client.build(path=basedir,rm=True,tag=image_name) ] #response_stream = response #imageId = response_stream[-1] #self.log(str(response_stream[-1])) # examine stream to determine success/failure of build imageId=None last={} for line in docker_client.build(path=basedir,rm=True,tag=image_name): line_parse = json.loads(line) if 'stream' in line_parse: self.log(line_parse['stream'],no_end_line=True) if 'errorDetail' in line_parse: self.log(str(line_parse),no_end_line=True) raise ValueError('Docker build failed: '+line_parse['errorDetail']) last=line_parse if 'stream' in last and last['stream'][:19]=='Successfully built ': imageId = docker_client.inspect_image(image_name)['Id'] self.log('Docker build successful.') self.log(' Image Id: ' + imageId) self.log(' Image Name: ' + image_name+'\n\n') return imageId def push_docker_image(self, docker_client, image_name): self.log('\nPushing docker image to registry for ' + image_name); (image,tag)=image_name.split(':') #response = [ line for line in docker_client.push(image, tag=tag, stream=True) ] #response_stream = response #self.log(str(response_stream)) # to do: examine stream to determine success/failure of build for line in docker_client.push(image, tag=tag, stream=True): # example line: #'{"status":"Pushing","progressDetail":{"current":32,"total":32},"progress":"[==================================================\\u003e] 32 B/32 B","id":"da200da4256c"}' line_parse = json.loads(line) if 'id' in line_parse: self.log(line_parse['id']+' - ',no_end_line=True) if 'status' in line_parse: self.log(line_parse['status'],no_end_line=True) if 'progress' in line_parse: self.log(' - ' + line_parse['progress'],no_end_line=True) #if 'progressDetail' in line_parse: # self.log(' - ' + str(line_parse['progressDetail']),no_end_line=True) self.log('') # add an endline # check for errors here somehow! self.log('done pushing docker image to registry for ' + image_name+'\n'); # Temporary flags to test everything except docker # we should remove once the test rig can fully support docker and an NMS _TEST_WITHOUT_DOCKER = False
class Registrar: # params is passed in from the controller, should be the same as passed into the spec # db is a reference to the Catalog DB interface (usually a MongoCatalogDBI instance) def __init__(self, params, registration_id, timestamp, username, is_admin,token, db, temp_dir, docker_base_url, docker_registry_host, docker_push_allow_insecure, nms_url, nms_admin_token, module_details, ref_data_base, kbase_endpoint, prev_dev_version): self.db = db self.params = params # at this point, we assume git_url has been checked self.git_url = params['git_url'] self.registration_id = registration_id self.timestamp = timestamp self.username = username self.is_admin = is_admin self.token = token self.db = db self.temp_dir = temp_dir self.docker_base_url = docker_base_url self.docker_registry_host = docker_registry_host self.docker_push_allow_insecure = docker_push_allow_insecure self.nms_url = nms_url self.nms = NarrativeMethodStore(self.nms_url, token=nms_admin_token) self.local_function_reader = LocalFunctionReader() # (most) of the mongo document for this module snapshot before this registration self.module_details = module_details self.log_buffer = []; self.last_log_time = time.time() # in seconds self.log_interval = 1.0 # save log to mongo every second self.ref_data_base = ref_data_base self.kbase_endpoint = kbase_endpoint self.prev_dev_version = prev_dev_version def start_registration(self): try: self.logfile = open(self.temp_dir+'/registration.log.'+self.registration_id, 'w') self.log('Registration started on '+ str(datetime.datetime.now()) + ' by '+self.username) self.log('Registration ID: '+str(self.registration_id)); self.log('Registration Parameters: '+str(self.params)); ############################## # 1 - clone the repo into the temp directory that should already be reserved for us self.set_build_step('cloning git repo') if not os.path.isdir(os.path.join(self.temp_dir,self.registration_id)): raise('Directory for the git clone was not allocated! This is an internal catalog server error, please report this problem.') basedir = os.path.join(self.temp_dir,self.registration_id,'module_repo') parsed_url=urlparse(self.git_url) self.log('Attempting to clone into: '+basedir); self.log('git clone ' + self.git_url) subprocess.check_call( ['git','clone',self.git_url, basedir ] ) # try to get hash from repo git_commit_hash = str( subprocess.check_output ( ['git','log', '--pretty=%H', '-n', '1' ], cwd=basedir ) ).rstrip() self.log('current commit hash at HEAD:' + git_commit_hash) if 'git_commit_hash' in self.params: if self.params['git_commit_hash']: self.log('git checkout ' + self.params['git_commit_hash'].strip()) subprocess.check_call ( ['git', 'checkout', '--quiet', self.params['git_commit_hash'] ], cwd=basedir ) git_commit_hash = self.params['git_commit_hash'].strip() # check if this was a git_commit_hash that was already released- if so, we abort for now (we could just update the dev tag in the future) for r in self.module_details['release_version_list']: if r['git_commit_hash'] == git_commit_hash: raise ValueError('The specified commit is already released. You cannot reregister that commit version or image.') # do the same for beta versions for now if 'beta' in self.module_details['current_versions'] and self.module_details['current_versions']['beta'] is not None: if self.module_details['current_versions']['beta']['git_commit_hash'] == git_commit_hash: raise ValueError('The specified commit is already registered and in beta. You cannot reregister that commit version or image.') ############################## # 2 - sanity check (things parse, files exist, module_name matches, etc) self.set_build_step('reading files and performing basic checks') self.sanity_checks_and_parse(basedir, git_commit_hash) ############################## # 2.5 - dealing with git releases .git/config.lock, if it still exists after 5s then kill it ###### should no longer need this after switching to subprocess # git_config_lock_file = os.path.join(basedir, ".git", "config.lock") # if os.path.exists(git_config_lock_file): # self.log('.git/config.lock exists, waiting 5s for it to release') # time.sleep(5) # if os.path.exists(git_config_lock_file): # self.log('.git/config.lock file still there, we are just going to delete it....') # os.remove(git_config_lock_file) ############################## # 3 docker build - in progress # perhaps make this a self attr? module_name_lc = self.get_required_field_as_string(self.kb_yaml,'module-name').strip().lower() self.image_name = self.docker_registry_host + '/kbase:' + module_name_lc + '.' + str(git_commit_hash) ref_data_folder = None ref_data_ver = None compilation_report = None if not Registrar._TEST_WITHOUT_DOCKER: # timeout set to 30 min because we often get timeouts if multiple people try to push at the same time dockerclient = None docker_timeout = 1800 if len(str(self.docker_base_url)) > 0: dockerclient = DockerClient(base_url = str(self.docker_base_url),timeout=docker_timeout) else: # docker base URL is not set in config, let's use Docker-related env-vars in this case docker_host = os.environ['DOCKER_HOST'] if docker_host is None or len(docker_host) == 0: raise ValueError('Docker host should be defined either in configuration ' '(docker-base-url property) or in DOCKER_HOST environment variable') docker_tls_verify = os.environ['DOCKER_TLS_VERIFY'] if docker_host.startswith('tcp://'): docker_protocol = "http" if (docker_tls_verify is not None) and docker_tls_verify == '1': docker_protocol = "https" docker_host = docker_host.replace('tcp://', docker_protocol + '://') docker_cert_path = os.environ['DOCKER_CERT_PATH'] docker_tls = False if (docker_cert_path is not None) and len(docker_cert_path) > 0: docker_tls = DockerTLSConfig(verify=False, client_cert=(docker_cert_path + '/cert.pem', docker_cert_path + '/key.pem')) self.log("Docker settings from environment variables are used: docker-host = " + docker_host + ", docker_cert_path = " + str(docker_cert_path)) dockerclient = DockerClient(base_url = docker_host,timeout=docker_timeout, version='auto', tls=docker_tls) # look for docker image # this tosses cookies if image doesn't exist, so wrap in try, and build if try reports "not found" #self.log(str(dockerclient.inspect_image(repo_name))) # if image does not exist, build and set state self.set_build_step('building the docker image') # imageId is not yet populated properly imageId = self.build_docker_image(dockerclient,self.image_name,basedir) # check if reference data version is defined in kbase.yml if 'data-version' in self.kb_yaml: ref_data_ver = str(self.kb_yaml['data-version']).strip() if ref_data_ver: ref_data_folder = module_name_lc target_ref_data_dir = os.path.join(self.ref_data_base, ref_data_folder, ref_data_ver) if os.path.exists(target_ref_data_dir): self.log("Reference data for " + ref_data_folder + "/" + ref_data_ver + " was " + "already prepared, initialization step is skipped") else: self.set_build_step('preparing reference data (running init entry-point), ' + 'ref-data version: ' + ref_data_ver) self.prepare_ref_data(dockerclient, self.image_name, self.ref_data_base, ref_data_folder, ref_data_ver, basedir, self.temp_dir, self.registration_id, self.token, self.kbase_endpoint) self.set_build_step('preparing compilation report') self.log('Preparing compilation report.') # Trying to extract compilation report with line numbers of funcdefs from docker image. # There is "report" entry-point command responsible for that. In case there are any # errors we just skip it. compilation_report = self.prepare_compilation_report(dockerclient, self.image_name, basedir, self.temp_dir, self.registration_id, self.token, self.kbase_endpoint) if compilation_report is None: raise ValueError('Unable to generate a compilation report, which is now required, so your registration cannot continue. ' + 'If you have been successfully registering this module already, this means that you may need to update ' + 'to the latest version of the KBase SDK and rebuild your makefile.') self.local_function_reader.finish_validation(compilation_report) self.log('Report complete') self.set_build_step('pushing docker image to registry') self.push_docker_image(dockerclient,self.image_name) else: self.log('IN TEST MODE!! SKIPPING DOCKER BUILD AND DOCKER REGISTRY UPDATE!!') # 4 - Update the DB self.set_build_step('updating the catalog') self.update_the_catalog(basedir, ref_data_folder, ref_data_ver, compilation_report) self.build_is_complete() except Exception as e: # set the build state to error and log it self.set_build_error(str(e)) self.log(traceback.format_exc(), is_error=True) self.log('BUILD_ERROR: '+str(e), is_error=True) if self.prev_dev_version: self.log('Reverting dev version to git_commit_hash=' + self.prev_dev_version['git_commit_hash'] + ', version=' + self.prev_dev_version['version'] + ', git_commit_message=' + self.prev_dev_version['git_commit_message']) self.db.update_dev_version(self.prev_dev_version, git_url=self.git_url) finally: self.flush_log_to_db(); self.logfile.close(); self.cleanup(); def sanity_checks_and_parse(self, basedir, git_commit_hash): # check that files exist yaml_filename = 'kbase.yaml' if not os.path.isfile(os.path.join(basedir,'kbase.yaml')) : if not os.path.isfile(os.path.join(basedir,'kbase.yml')): raise ValueError('kbase.yaml file does not exist in repo, but is required!') else: yaml_filename = 'kbase.yml' # parse some stuff, and check for things with codecs.open(os.path.join(basedir,yaml_filename), 'r', "utf-8", errors='ignore') as kb_yaml_file: kb_yaml_string = kb_yaml_file.read() self.kb_yaml = yaml.load(kb_yaml_string) self.log('=====kbase.yaml parse:') self.log(pprint.pformat(self.kb_yaml)) self.log('=====end kbase.yaml') module_name = self.get_required_field_as_string(self.kb_yaml,'module-name').strip() module_description = self.get_required_field_as_string(self.kb_yaml,'module-description').strip() version = self.get_required_field_as_string(self.kb_yaml,'module-version').strip() # must be a semantic version if not semantic_version.validate(version): raise ValueError('Invalid version string in kbase.yaml - must be in semantic version format. See http://semver.org') service_language = self.get_required_field_as_string(self.kb_yaml,'service-language').strip() owners = self.get_required_field_as_list(self.kb_yaml,'owners') service_config = self.get_optional_field_as_dict(self.kb_yaml, 'service-config') if service_config: # validate service_config parameters if 'dynamic-service' in service_config: if not type(service_config['dynamic-service']) == type(True): raise ValueError('Invalid service-config in kbase.yaml - "dynamic-service" property must be a boolean "true" or "false".') # module_name must match what exists (unless it is not yet defined) if 'module_name' in self.module_details: if self.module_details['module_name'] != module_name: raise ValueError('kbase.yaml file module_name field has changed since last version! ' + 'Module names are permanent- if this is a problem, contact a kbase admin.') else: # This must be the first registration, so the module must not exist yet self.check_that_module_name_is_valid(module_name); # associate the module_name with the log file for easier searching (if we fail sooner, then the module name # cannot be used to lookup this log) self.db.set_build_log_module_name(self.registration_id, module_name) # you can't remove yourself from the owners list, or register something that you are not an owner of if self.username not in owners and self.is_admin is False: raise ValueError('Your kbase username ('+self.username+') must be in the owners list in the kbase.yaml file.') # OPTIONAL TODO: check if all the users are on the owners list? not necessarily required, because we # do a check during registration of the person who started the registration... # TODO: check for directory structure, method spec format, documentation, version self.validate_method_specs(basedir) # initial validation of the local function specifications lf_report = self.local_function_reader.parse_and_basic_validation(basedir, self.module_details, module_name, version, git_commit_hash) self.log(self.local_function_reader.report_to_string_for_log(lf_report)) if len(lf_report['functions_errored']) > 0: raise ValueError('Errors exist in local function specifications.') # return the parse so we can figure things out later return self.kb_yaml def check_that_module_name_is_valid(self, module_name): if self.db.is_registered(module_name=module_name): raise ValueError('Module name (in kbase.yaml) is already registered. Please specify a different name and try again.') if self.db.module_name_lc_exists(module_name_lc=module_name.lower()): raise ValueError('The case-insensitive module name (in kbase.yaml) is not unique. Please specify a different name.') # only allow alphanumeric and underscore if not re.match(r'^[A-Za-z0-9_]+$', module_name): raise ValueError('Module names must be alphanumeric characters (including underscores) only, with no spaces.') def update_the_catalog(self, basedir, ref_data_folder, ref_data_ver, compilation_report): # get the basic info that we need commit_hash = str( subprocess.check_output ( ['git','log', '--pretty=%H', '-n', '1' ], cwd=basedir ) ).rstrip() commit_message = str( subprocess.check_output ( ['git','log', '--pretty=%B', '-n', '1' ], cwd=basedir ) ).rstrip() module_name = self.get_required_field_as_string(self.kb_yaml,'module-name') module_description = self.get_required_field_as_string(self.kb_yaml,'module-description') version = self.get_required_field_as_string(self.kb_yaml,'module-version') service_language = self.get_required_field_as_string(self.kb_yaml,'service-language') owners = self.get_required_field_as_list(self.kb_yaml,'owners') service_config = self.get_optional_field_as_dict(self.kb_yaml, 'service-config') # first update the module name, which is now permanent, if we haven't already if ('module_name' not in self.module_details) or ('module_name_lc' not in self.module_details): error = self.db.set_module_name(self.git_url, module_name) if error is not None: raise ValueError('Unable to set module_name - there was an internal database error.' +error) # TODO: Could optimize by combining all these things into one mongo call, but for now this is easier. # Combining it into one call would just mean that this update happens as a single transaction, but a partial # update for now that fails midstream is probably not a huge issue- we can always reregister. # next update the basic information info = { 'description': module_description, 'language' : service_language } if service_config and service_config['dynamic-service']: info['dynamic_service'] = 1 else: info['dynamic_service'] = 0 local_functions = self.local_function_reader.extract_lf_records() if len(local_functions)>0: info['local_functions'] = 1 else: info['local_functions'] = 0 self.log('new info: '+pprint.pformat(info)) error = self.db.set_module_info(info, git_url=self.git_url) if error is not None: raise ValueError('Unable to set module info - there was an internal database error: '+str(error)) # next update the owners ownersListForUpdate = [] for o in owners: # TODO: add some validation that the username is a valid kbase user ownersListForUpdate.append({'kb_username':o}) self.log('new owners list: '+pprint.pformat(ownersListForUpdate)) error = self.db.set_module_owners(ownersListForUpdate, git_url=self.git_url) if error is not None: raise ValueError('Unable to set module owners - there was an internal database error: '+str(error)) # finally update the actual dev version info narrative_methods = [] if os.path.isdir(os.path.join(basedir,'ui','narrative','methods')) : for m in os.listdir(os.path.join(basedir,'ui','narrative','methods')): if os.path.isdir(os.path.join(basedir,'ui','narrative','methods',m)): narrative_methods.append(m) if len(local_functions) > 0: self.log('Saving local function specs:') self.log(pprint.pformat(local_functions)) error = self.db.save_local_function_specs(local_functions) if error is not None: raise ValueError('There was an error saving local function specs, DB says: '+str(error)) new_version = { 'module_name': module_name.strip(), 'module_name_lc': module_name.strip().lower(), 'module_description': module_description, 'released':0, 'released_timestamp':None, 'notes': '', 'timestamp':self.timestamp, 'registration_id':self.registration_id, 'version' : version, 'git_commit_hash': commit_hash, 'git_commit_message': commit_message, 'narrative_methods': narrative_methods, 'local_functions' : self.local_function_reader.extract_lf_names(), 'docker_img_name': self.image_name, 'compilation_report': compilation_report } if ref_data_ver: new_version['data_folder'] = ref_data_folder new_version['data_version'] = ref_data_ver if service_config and service_config['dynamic-service']: new_version['dynamic_service'] = 1 else: new_version['dynamic_service'] = 0 self.log('new dev version object: '+pprint.pformat(new_version)) error = self.db.update_dev_version(new_version) if error is not None: raise ValueError('Unable to update dev version - there was an internal database error: '+str(error)) #push to NMS self.log('registering specs with NMS') self.nms.register_repo({'git_url':self.git_url, 'git_commit_hash':commit_hash}) self.log('\ndone') # done!!! def validate_method_specs(self, basedir): self.log('validating narrative method specifications') if os.path.isdir(os.path.join(basedir,'ui','narrative','methods')) : for m in os.listdir(os.path.join(basedir,'ui','narrative','methods')): if os.path.isdir(os.path.join(basedir,'ui','narrative','methods',m)): self.log(' - validating method: '+m) # first grab the spec and display files, which are required method_path = os.path.join(basedir,'ui','narrative','methods',m) if not os.path.isfile(os.path.join(method_path,'spec.json')): raise ValueError('Invalid narrative method specification ('+m+'): No spec.json file defined.') if not os.path.isfile(os.path.join(method_path,'display.yaml')): raise ValueError('Invalid narrative method specification ('+m+'): No spec.json file defined.') with codecs.open(os.path.join(method_path,'spec.json'), 'r', "utf-8", errors='ignore') as spec_json_file: spec_json = spec_json_file.read() with codecs.open(os.path.join(method_path,'display.yaml'), 'r', "utf-8", errors='ignore') as display_yaml_file: display_yaml = display_yaml_file.read() # gather any extra html files extraFiles = {} for extra_file_name in os.listdir(os.path.join(method_path)): if not os.path.isfile(os.path.join(method_path,extra_file_name)): break if not extra_file_name.endswith('.html'): break with codecs.open(oos.path.join(method_path,extra_file_name), 'r', "utf-8", errors='ignore') as extra_file: extrafiles[extra_file_name] = extra_file.read() # validate against the NMS target endpoint result = self.nms.validate_method({'id':m, 'spec_json':spec_json, 'display_yaml':display_yaml, 'extra_files':extraFiles}); # inspect results if result['is_valid']>0: self.log(' - valid!') if 'warnings' in result: if result['warnings']: for w in result['warnings']: self.log(' - warning: '+w) else: self.log(' - not valid!', is_error=True) if 'errors' in result: if result['errors']: for e in result['errors']: self.log(' - error: '+e, is_error=True) else: self.log(' - error is undefined!'+e, is_error=True) raise ValueError('Invalid narrative method specification ('+m+')') else: self.log(' - no ui/narrative/methods directory found, so no narrative methods will be deployed') def get_required_field_as_string(self, kb_yaml, field_name): if field_name not in kb_yaml: raise ValueError('kbase.yaml file missing "'+field_name+'" required field') value = kb_yaml[field_name].strip() if not value: raise ValueError('kbase.yaml file missing value for "'+field_name+'" required field') return value def get_required_field_as_list(self, kb_yaml, field_name): if field_name not in kb_yaml: raise ValueError('kbase.yaml file missing "'+field_name+'" required field') value = kb_yaml[field_name] if not type(value) is list: raise ValueError('kbase.yaml file "'+field_name+'" required field must be a list') return value def get_optional_field_as_dict(self, kb_yaml, field_name): if field_name not in kb_yaml: return None value = kb_yaml[field_name] if not type(value) is dict: raise ValueError('kbase.yaml file "'+field_name+'" optional field must be a dict') return value def log(self, message, no_end_line=False, is_error=False): if no_end_line: content = message else: content = message + '\n' self.logfile.write(content) self.logfile.flush() lines = content.splitlines(); for l in lines: # add each line to the buffer if len(l)>1000 : l = l[0:1000] + ' ... truncated to 1k characters of ' + str(len(l)) self.log_buffer.append({'content':l+'\n', 'error':is_error}) # save the buffer to mongo if enough time has elapsed, or the buffer is more than 1000 lines if (time.time() - self.last_log_time > self.log_interval) or (len(self.log_buffer)>1000): self.flush_log_to_db(); def flush_log_to_db(self): # todo: if we lose log lines, that's ok. Make sure we handle case if log is larger than mongo doc size self.db.append_to_build_log(self.registration_id, self.log_buffer) self.log_buffer = [] #clear the buffer self.last_log_time = time.time() # reset the log timer def set_build_step(self, step): self.db.set_module_registration_state(git_url=self.git_url, new_state='building: '+step) self.db.set_build_log_state(self.registration_id, 'building: '+step) def set_build_error(self, error_message): self.db.set_module_registration_state(git_url=self.git_url, new_state='error', error_message=error_message) self.db.set_build_log_state(self.registration_id, 'error', error_message=error_message) def build_is_complete(self): self.db.set_module_registration_state(git_url=self.git_url, new_state='complete') self.db.set_build_log_state(self.registration_id, 'complete') def cleanup(self): if os.path.isdir(os.path.join(self.temp_dir,self.registration_id)): shutil.rmtree(os.path.join(self.temp_dir,self.registration_id)) def build_docker_image(self, docker_client, image_name, basedir): self.log('\nBuilding the docker image for ' + image_name); # examine stream to determine success/failure of build imageId=None last={} for line in docker_client.build(path=basedir,rm=True,tag=image_name, pull=False): line_parse = json.loads(line) log_line = '' if 'stream' in line_parse: self.log(line_parse['stream'],no_end_line=True) if 'errorDetail' in line_parse: self.log(str(line_parse),no_end_line=True) raise ValueError('Docker build failed: '+str(line_parse['errorDetail'])) last=line_parse if 'stream' in last and last['stream'][:19]=='Successfully built ': imageId = docker_client.inspect_image(image_name)['Id'] self.log('Docker build successful.') self.log(' Image Id: ' + imageId) self.log(' Image Name: ' + image_name+'\n\n') return imageId def push_docker_image(self, docker_client, image_name): self.log('\nPushing docker image to registry for ' + image_name); colon_pos = image_name.rfind(':') # This logic supports images with "host:port/" prefix for private registry image=image_name[:colon_pos] tag=image_name[colon_pos+1:] #response = [ line for line in docker_client.push(image, tag=tag, stream=True) ] #response_stream = response #self.log(str(response_stream)) # to do: examine stream to determine success/failure of build if self.docker_push_allow_insecure: print("Docker push: insecure_registry: "+ str(self.docker_push_allow_insecure)) else: print("Docker push: insecure_registry: None") for line in docker_client.push(image, tag=tag, stream=True, insecure_registry = self.docker_push_allow_insecure): # example line: #'{"status":"Pushing","progressDetail":{"current":32,"total":32},"progress":"[==================================================\\u003e] 32 B/32 B","id":"da200da4256c"}' line_parse = json.loads(line) log_line = '' if 'id' in line_parse: log_line += line_parse['id']+' - '; if 'status' in line_parse: log_line += line_parse['status'] if 'progress' in line_parse: log_line += ' - ' + line_parse['progress'] #if 'progressDetail' in line_parse: # self.log(' - ' + str(line_parse['progressDetail']),no_end_line=True) # catch anything unexpected, we should probably throw an error here for key in line_parse: if key not in ['id','status','progress','progressDetail']: log_line += '['+key+'='+str(line_parse[key])+'] ' self.log(log_line) if 'error' in line_parse: self.log(str(line_parse),no_end_line=True) raise ValueError('Docker push failed: '+str(line_parse['error'])) self.log('done pushing docker image to registry for ' + image_name+'\n'); def run_docker_container(self, dockerclient, image_name, token, kbase_endpoint, binds, work_dir, command): cnt_id = None try: token_file = os.path.join(work_dir, "token") with open(token_file, "w") as file: file.write(token) config_file = os.path.join(work_dir, "config.properties") with open(config_file, "w") as file: file.write("[global]\n" + "job_service_url = " + kbase_endpoint + "/userandjobstate\n" + "workspace_url = " + kbase_endpoint + "/ws\n" + "shock_url = " + kbase_endpoint + "/shock-api\n" + "kbase_endpoint = " + kbase_endpoint + "\n") if not binds: binds = {} binds[work_dir] = {"bind": "/kb/module/work", "mode": "rw"} container = dockerclient.create_container(image=image_name, command=command, tty=True, host_config=dockerclient.create_host_config(binds=binds)) cnt_id = container.get('Id') self.log('Running "' + command + '" entry-point command, container Id=' + cnt_id) dockerclient.start(container=cnt_id) stream = dockerclient.logs(container=cnt_id, stdout=True, stderr=True, stream=True) line = "" for char in stream: if char == '\r': continue if char == '\n': self.log(line) line = "" else: line += char if len(line) > 0: self.log(line) finally: # cleaning up the container try: if cnt_id: dockerclient.remove_container(container=cnt_id, v=True, force=True) self.log("Docker container (Id=" + cnt_id + ") was cleaned up") except: pass def prepare_ref_data(self, dockerclient, image_name, ref_data_base, ref_data_folder, ref_data_ver, basedir, temp_dir, registration_id, token, kbase_endpoint): self.log('\nReference data: creating docker container for initialization') if not os.path.exists(ref_data_base): raise ValueError("Reference data network folder doesn't exist: " + ref_data_base) upper_target_dir = os.path.join(ref_data_base, ref_data_folder) if not os.path.exists(upper_target_dir): os.mkdir(upper_target_dir) temp_ref_data_dir = os.path.join(upper_target_dir, "temp_" + registration_id) try: repo_data_dir = os.path.join(basedir, "data") os.mkdir(temp_ref_data_dir) binds = {temp_ref_data_dir: {"bind": "/data", "mode": "rw"}, repo_data_dir: {"bind": "/kb/module/data", "mode": "rw"}} temp_work_dir = os.path.join(temp_dir,registration_id,'ref_data_workdir') os.mkdir(temp_work_dir) self.run_docker_container(dockerclient, image_name, token, kbase_endpoint, binds, temp_work_dir, 'init') ready_file = os.path.join(temp_ref_data_dir, "__READY__") if os.path.exists(ready_file): target_dir = os.path.join(upper_target_dir, ref_data_ver) os.rename(temp_ref_data_dir, target_dir) self.log("Reference data was successfully deployed into " + target_dir) else: raise ValueError("__READY__ file is not detected in reference data folder, produced data will be discarded") finally: # cleaning up temporary ref-data (if not renamed into permanent after success) try: if os.path.exists(temp_ref_data_dir): shutil.rmtree(temp_ref_data_dir) except: pass def prepare_compilation_report(self, dockerclient, image_name, basedir, temp_dir, registration_id, token, kbase_endpoint): self.log('\nCompilation report: creating docker container') try: temp_work_dir = os.path.join(temp_dir,registration_id,'report_workdir') os.mkdir(temp_work_dir) self.run_docker_container(dockerclient, image_name, token, kbase_endpoint, None, temp_work_dir, 'report') report_file = os.path.join(temp_work_dir, 'compile_report.json') if not os.path.exists(report_file): self.log("Report file doesn't exist: " + report_file) return None else: with open(report_file) as f: return json.load(f) except Exception, e: self.log("Error preparing compilation log: " + str(e)) return None
# GetAppStats # import requests import os import datetime, time import mysql.connector as mysql from biokbase.catalog.Client import Catalog from biokbase.narrative_method_store.client import NarrativeMethodStore requests.packages.urllib3.disable_warnings() catalog = Catalog(url=os.environ['CATALOG_URL'], token=os.environ['METRICS_USER_TOKEN']) nms = NarrativeMethodStore(url=os.environ['NARRATIVE_METHOD_STORE']) sql_host = os.environ['SQL_HOST'] query_on = os.environ['QUERY_ON'] #Insures all finish times within last day. yesterday = (datetime.date.today() - datetime.timedelta(days=1)) def get_user_app_stats( start_date=datetime.datetime.combine(yesterday, datetime.datetime.min.time()), end_date=datetime.datetime.combine(yesterday, datetime.datetime.max.time())): """ Gets a data dump from the app cataloge for a certain date window. If no statt and end date are entered it will default to the last 15 calendar days (UTC TIME). It is 15 days because it uses an underlying method that filters by creation_time and not finish_time
def update_the_catalog(self, repo, basedir): # get the basic info that we need commit_hash = repo.head.commit.hexsha commit_message = repo.head.commit.message module_name = self.get_required_field_as_string(self.kb_yaml,'module-name') module_description = self.get_required_field_as_string(self.kb_yaml,'module-description') version = self.get_required_field_as_string(self.kb_yaml,'module-version') service_language = self.get_required_field_as_string(self.kb_yaml,'service-language') owners = self.get_required_field_as_list(self.kb_yaml,'owners') # first update the module name, which is now permanent, if we haven't already if ('module_name' not in self.module_details) or ('module_name_lc' not in self.module_details): error = self.db.set_module_name(self.git_url, module_name) if error is not None: raise ValueError('Unable to set module_name - there was an internal database error.' +error) # TODO: Could optimize by combining all these things into one mongo call, but for now this is easier. # Combining it into one call would just mean that this update happens as a single transaction, but a partial # update for now that fails midstream is probably not a huge issue- we can always reregister. # next update the basic information info = { 'description': module_description, 'language' : service_language } self.log('new info: '+pprint.pformat(info)) error = self.db.set_module_info(info, git_url=self.git_url) if error is not None: raise ValueError('Unable to set module info - there was an internal database error: '+error) # next update the owners ownersListForUpdate = [] for o in owners: # TODO: add some validation that the username is a valid kbase user ownersListForUpdate.append({'kb_username':o}) self.log('new owners list: '+pprint.pformat(ownersListForUpdate)) error = self.db.set_module_owners(ownersListForUpdate, git_url=self.git_url) if error is not None: raise ValueError('Unable to set module owners - there was an internal database error: '+error) # finally update the actual dev version info narrative_methods = [] if os.path.isdir(os.path.join(basedir,'ui','narrative','methods')) : for m in os.listdir(os.path.join(basedir,'ui','narrative','methods')): if os.path.isdir(os.path.join(basedir,'ui','narrative','methods',m)): narrative_methods.append(m) new_version = { 'timestamp':self.timestamp, 'version' : version, 'git_commit_hash': commit_hash, 'git_commit_message': commit_message, 'narrative_methods': narrative_methods, 'docker_img_name': self.image_name } self.log('new dev version object: '+pprint.pformat(new_version)) error = self.db.update_dev_version(new_version, git_url=self.git_url) if error is not None: raise ValueError('Unable to update dev version - there was an internal database error: '+error) #push to NMS if Registrar._TEST_WITHOUT_NMS: self.log('IN TEST MODE!! SKIPPING NMS UPDATE!!') return nms = NarrativeMethodStore(self.nms_url,user_id=self.nms_admin_user,password=self.nms_admin_psswd) nms.register_repo({'git_url':self.git_url, 'git_commit_hash':commit_hash})
def __init__(self, config): # first grab the admin list self.adminList = [] if 'admin-users' in config: tokens = config['admin-users'].split(',') for t in tokens: if t.strip(): self.adminList.append(t.strip()) if not self.adminList: warnings.warn('no "admin-users" are set in config of CatalogController.') # make sure the minimal mongo settings are in place if 'mongodb-host' not in config: raise ValueError('"mongodb-host" config variable must be defined to start a CatalogController!') if 'mongodb-database' not in config: raise ValueError('"mongodb-database" config variable must be defined to start a CatalogController!') # give warnings if no mongo user information is set if 'mongodb-user' not in config: warnings.warn('"mongodb-user" is not set in config of CatalogController.') config['mongodb-user']='' config['mongodb-pwd']='' if 'mongodb-pwd' not in config: warnings.warn('"mongodb-pwd" is not set in config of CatalogController.') config['mongodb-pwd']='' # instantiate the mongo client self.db = MongoCatalogDBI( config['mongodb-host'], config['mongodb-database'], config['mongodb-user'], config['mongodb-pwd']) # check for the temp directory and make sure it exists if 'temp-dir' not in config: raise ValueError('"temp-dir" config variable must be defined to start a CatalogController!') self.temp_dir = config['temp-dir'] if not os.path.exists(self.temp_dir): raise ValueError('"temp-dir" does not exist! It is required for registration to work!') if not os.path.exists(self.temp_dir): raise ValueError('"temp-dir" does not exist! Space is required for registration to work!') if not os.access(self.temp_dir, os.W_OK): raise ValueError('"temp-dir" not writable! Writable space is required for registration to work!') if 'docker-base-url' not in config: raise ValueError('"docker-base-url" config variable must be defined to start a CatalogController!') self.docker_base_url = config['docker-base-url'] print(self.docker_base_url) if 'docker-registry-host' not in config: raise ValueError('"docker-registry-host" config variable must be defined to start a CatalogController!') self.docker_registry_host = config['docker-registry-host'] print(self.docker_registry_host) if 'nms-url' not in config: raise ValueError('"nms-url" config variable must be defined to start a CatalogController!') self.nms_url = config['nms-url'] if 'nms-admin-user' not in config: raise ValueError('"nms-admin-user" config variable must be defined to start a CatalogController!') self.nms_admin_user = config['nms-admin-user'] if 'nms-admin-psswd' not in config: raise ValueError('"nms-admin-psswd" config variable must be defined to start a CatalogController!') self.nms_admin_psswd = config['nms-admin-psswd'] self.nms = NarrativeMethodStore(self.nms_url,user_id=self.nms_admin_user,password=self.nms_admin_psswd)
class CatalogController: def __init__(self, config): # first grab the admin list self.adminList = [] if 'admin-users' in config: tokens = config['admin-users'].split(',') for t in tokens: if t.strip(): self.adminList.append(t.strip()) if not self.adminList: warnings.warn('no "admin-users" are set in config of CatalogController.') # make sure the minimal mongo settings are in place if 'mongodb-host' not in config: raise ValueError('"mongodb-host" config variable must be defined to start a CatalogController!') if 'mongodb-database' not in config: raise ValueError('"mongodb-database" config variable must be defined to start a CatalogController!') # give warnings if no mongo user information is set if 'mongodb-user' not in config: warnings.warn('"mongodb-user" is not set in config of CatalogController.') config['mongodb-user']='' config['mongodb-pwd']='' if 'mongodb-pwd' not in config: warnings.warn('"mongodb-pwd" is not set in config of CatalogController.') config['mongodb-pwd']='' # instantiate the mongo client self.db = MongoCatalogDBI( config['mongodb-host'], config['mongodb-database'], config['mongodb-user'], config['mongodb-pwd']) # check for the temp directory and make sure it exists if 'temp-dir' not in config: raise ValueError('"temp-dir" config variable must be defined to start a CatalogController!') self.temp_dir = config['temp-dir'] if not os.path.exists(self.temp_dir): raise ValueError('"temp-dir" does not exist! It is required for registration to work!') if not os.path.exists(self.temp_dir): raise ValueError('"temp-dir" does not exist! Space is required for registration to work!') if not os.access(self.temp_dir, os.W_OK): raise ValueError('"temp-dir" not writable! Writable space is required for registration to work!') if 'docker-base-url' not in config: raise ValueError('"docker-base-url" config variable must be defined to start a CatalogController!') self.docker_base_url = config['docker-base-url'] print(self.docker_base_url) if 'docker-registry-host' not in config: raise ValueError('"docker-registry-host" config variable must be defined to start a CatalogController!') self.docker_registry_host = config['docker-registry-host'] print(self.docker_registry_host) if 'nms-url' not in config: raise ValueError('"nms-url" config variable must be defined to start a CatalogController!') self.nms_url = config['nms-url'] if 'nms-admin-user' not in config: raise ValueError('"nms-admin-user" config variable must be defined to start a CatalogController!') self.nms_admin_user = config['nms-admin-user'] if 'nms-admin-psswd' not in config: raise ValueError('"nms-admin-psswd" config variable must be defined to start a CatalogController!') self.nms_admin_psswd = config['nms-admin-psswd'] self.nms = NarrativeMethodStore(self.nms_url,user_id=self.nms_admin_user,password=self.nms_admin_psswd) def register_repo(self, params, username, token): if 'git_url' not in params: raise ValueError('git_url not defined, but is required for registering a repository') git_url = params['git_url'] if not bool(urlparse(git_url).netloc): raise ValueError('The git url provided is not a valid URL.') # generate a unique registration ID based on a timestamp in ms + 4 random digits timestamp = int((datetime.utcnow() - datetime.utcfromtimestamp(0)).total_seconds()*1000) registration_id = str(timestamp)+'_'+str(random.randint(1000,9999)) tries = 20 for t in range(20): try: # keep trying to make the directory until it works os.mkdir(os.path.join(self.temp_dir,registration_id)) break except: # if we fail, wait a bit and try again time.sleep(0.002) timestamp = int((datetime.utcnow() - datetime.utcfromtimestamp(0)).total_seconds()*1000) registration_id = str(timestamp)+'_'+random.randint(1000,9999) # if we couldn't reserve a spot for this registration, then quit if not os.path.isdir(os.path.join(self.temp_dir,registration_id)): raise ValueError('Unable to allocate a directory for building. Try again, and if the problem persists contact us.') # 0) Make sure the submitter is on the list if not self.is_approved_developer([username])[0]: raise ValueError('You are not an approved developer. Contact us to request approval.') # 1) If the repo does not yet exist, then create it. No additional permission checks needed if not self.db.is_registered(git_url=git_url) : self.db.register_new_module(git_url, username, timestamp) module_details = self.db.get_module_details(git_url=git_url) # 2) If it has already been registered, make sure the user has permissions to update, and # that the module is in a state where it can be registered else: module_details = self.db.get_module_details(git_url=git_url) # 2a) Make sure the user has permission to register this URL if self.has_permission(username,module_details['owners']): # 2b) Make sure the current registration state is either 'complete' or 'error' state = module_details['state'] registration_state = state['registration'] if registration_state == 'complete' or registration_state == 'error': error = self.db.set_module_registration_state(git_url=git_url, new_state='started', last_state=registration_state) if error is not None: # we can fail if the registration state changed when we were first checking to now. This is important # to ensure we only ever kick off one registration thread at a time raise ValueError('Registration failed for git repo ('+git_url+') - registration state was modified before build could begin: '+error) # we know we are the only operation working, so we can clear the dev version and upate the timestamp self.db.update_dev_version({'timestamp':timestamp}, git_url=git_url) else: raise ValueError('Registration already in progress for this git repo ('+git_url+')') else : raise ValueError('You ('+username+') are an approved developer, but do not have permission to register this repo ('+git_url+')') # 3) Ok, kick off the registration thread # - This will check out the repo, attempt to build the image, run some tests, store the image # - If all went well, and during the process, it will update the registration state of the # module and finally update the dev version # - If things failed, it will set the error state, and set an error message. # first set the dev current_release timestamp t = threading.Thread(target=_start_registration, args=(params,registration_id,timestamp,username,token,self.db, self.temp_dir, self.docker_base_url, self.docker_registry_host, self.nms_url, self.nms_admin_user, self.nms_admin_psswd, module_details)) t.start() # 4) provide the timestamp return registration_id def set_registration_state(self, params, username): # first some error handling if not self.is_admin(username): raise ValueError('You do not have permission to modify the registration state of this module/repo.') params = self.filter_module_or_repo_selection(params) if 'registration_state' not in params: raise ValueError('Update failed - no registration state indicated.') #TODO: possibly check for empty states or that the state is a valid state here #if not params['registration_state'] : error_message = '' if params['registration_state'] == 'error': if 'error_message' not in params: raise ValueError('Update failed - if state is "error", you must also set an "error_message".') if params['error_message']: raise ValueError('Update failed - if state is "error", you must also set an "error_message".') error_message = params['error_message'] else: # then we update the state error = self.db.set_module_registration_state( git_url=params['git_url'], module_name=params['module_name'], new_state=params['registration_state'], error_message=error_message) if error is not None: raise ValueError('Registration failed for git repo ('+git_url+')- some unknown database error: ' + error) def push_dev_to_beta(self, params, username): # first make sure everything exists and we have permissions params = self.filter_module_or_repo_selection(params) module_details = self.db.get_module_details(module_name=params['module_name'],git_url=params['git_url']) # Make sure the submitter is still an approved developer if not self.is_approved_developer([username])[0]: raise ValueError('You are not an approved developer. Contact us to request approval.') if not self.has_permission(username,module_details['owners']): raise ValueError('You do not have permission to modify this module/repo.') # next make sure the state of the module is ok (it must be active, no pending registrations or release requests) if not module_details['state']['active']: raise ValueError('Cannot push dev to beta- module/repo is no longer active.') if module_details['state']['registration'] != 'complete': raise ValueError('Cannot push dev to beta- last registration is in progress or has an error.') if module_details['state']['release_approval'] == 'under_review': raise ValueError('Cannot push dev to beta- last release request of beta is still pending.') # ok, do it. self.nms.push_repo_to_tag({'module_name':module_details['module_name'], 'tag':'beta'}) error = self.db.push_dev_to_beta(module_name=params['module_name'],git_url=params['git_url']) if error is not None: raise ValueError('Update operation failed - some unknown database error: '+error) def request_release(self, params, username): # first make sure everything exists and we have permissions params = self.filter_module_or_repo_selection(params) module_details = self.db.get_module_details(module_name=params['module_name'],git_url=params['git_url']) # Make sure the submitter is still an approved developer if not self.is_approved_developer([username])[0]: raise ValueError('You are not an approved developer. Contact us to request approval.') if not self.has_permission(username,module_details['owners']): raise ValueError('You do not have permission to modify this module/repo.') # next make sure the state of the module is ok (it must be active, no pending release requests) if not module_details['state']['active']: raise ValueError('Cannot request release - module/repo is no longer active.') if module_details['state']['release_approval'] == 'under_review': raise ValueError('Cannot request release - last release request of beta is still pending.') # beta version must exist if not module_details['current_versions']['beta']: raise ValueError('Cannot request release - no beta version has been created yet.') # beta version must be different than release version (if release version exists) if module_details['current_versions']['release']: if module_details['current_versions']['beta']['timestamp'] == module_details['current_versions']['release']['timestamp']: raise ValueError('Cannot request release - beta version is identical to released version.') # ok, do it. error = self.db.set_module_release_state( module_name=params['module_name'],git_url=params['git_url'], new_state='under_review', last_state=module_details['state']['release_approval'] ) if error is not None: raise ValueError('Release request failed - some unknown database error.'+error) def list_requested_releases(self): query={'state.release_approval':'under_review'} results=self.db.find_current_versions_and_owners(query) requested_releases = [] for r in results: owners = [] for o in r['owners']: owners.append(o['kb_username']) beta = r['current_versions']['beta'] timestamp = beta['timestamp'] requested_releases.append({ 'module_name':r['module_name'], 'git_url':r['git_url'], 'timestamp':timestamp, 'git_commit_hash':beta['git_commit_hash'], 'git_commit_message':beta['git_commit_message'], 'owners':owners }) return requested_releases def review_release_request(self, review, username): if not self.is_admin(username): raise ValueError('You do not have permission to review a release request.') review = self.filter_module_or_repo_selection(review) module_details = self.db.get_module_details(module_name=review['module_name'],git_url=review['git_url']) if module_details['state']['release_approval'] != 'under_review': raise ValueError('Cannot review request - module/repo is not under review!') if not module_details['state']['active']: raise ValueError('Cannot review request - module/repo is no longer active.') if module_details['state']['release_approval'] != 'under_review': raise ValueError('Cannot review request - module/repo is not under review!') if 'decision' not in review: raise ValueError('Cannot set review - no "decision" was provided!') if not review['decision']: raise ValueError('Cannot set review - no "decision" was provided!') if review['decision']=='denied': if 'review_message' not in review: raise ValueError('Cannot set review - if denied, you must set a "review_message"!') if not review['review_message'].strip(): raise ValueError('Cannot set review - if denied, you must set a "review_message"!') if 'review_message' not in review: review['review_message']='' if review['decision'] not in ['approved','denied']: raise ValueError('Cannot set review - decision must be "approved" or "denied"') # ok, do it. # if the state is approved, then we need to save the beta version over the release version and stash # a new entry. The DBI will handle that for us. (note that concurency issues don't really matter # here because if this is done twice (for instance, before the release_state is set to approved in # the document in the next call) there won't be any problems.) I like nested parentheses. if review['decision']=='approved': self.nms.push_repo_to_tag({'module_name':module_details['module_name'], 'tag':'release'}) error = self.db.push_beta_to_release(module_name=review['module_name'],git_url=review['git_url']) # Now we can update the release state state... error = self.db.set_module_release_state( module_name=review['module_name'],git_url=review['git_url'], new_state=review['decision'], last_state=module_details['state']['release_approval'], review_message=review['review_message'] ) if error is not None: raise ValueError('Release review update failed - some unknown database error. ' + error) def get_module_state(self, params): params = self.filter_module_or_repo_selection(params) return self.db.get_module_state(module_name=params['module_name'],git_url=params['git_url']) def get_module_info(self, params): params = self.filter_module_or_repo_selection(params) details = self.db.get_module_details(module_name=params['module_name'], git_url=params['git_url']) owners = [] for o in details['owners']: owners.append(o['kb_username']) info = { 'module_name': details['module_name'], 'git_url': details['git_url'], 'description': details['info']['description'], 'language': details['info']['language'], 'owners': owners, 'release': details['current_versions']['release'], 'beta': details['current_versions']['beta'], 'dev': details['current_versions']['dev'] } return info def get_version_info(self,params): params = self.filter_module_or_repo_selection(params) current_version = self.db.get_module_current_versions(module_name=params['module_name'], git_url=params['git_url']) if not current_version: return None # TODO: can make this more effecient and flexible by putting in some indicies and doing the query on mongo side # right now, we require a module name / git url, and request specific version based on selectors. in the future # we could, for instance, get all versions that match a particular git commit hash, or timestamp... # If version is in params, it should be one of dev, beta, release if 'version' in params: if params['version'] not in ['dev','beta','release']: raise ValueError('invalid version selection, valid versions are: "dev" | "beta" | "release"') v = current_version[params['version']] # if timestamp or git_commit_hash is given, those need to match as well if 'timestamp' in params: if v['timestamp'] != params['timestamp'] : return None; if 'git_commit_hash' in params: if v['git_commit_hash'] != params['git_commit_hash'] : return None; return v if 'timestamp' in params: # first check in current versions for version in ['dev','beta','release']: if current_version[version]['timestamp'] == params['timestamp']: v = current_version[version] if 'git_commit_hash' in params: if v['git_commit_hash'] != params['git_commit_hash'] : return None; return v # if we get here, we have to look in full history details = self.db.get_module_full_details(module_name=params['module_name'], git_url=params['git_url']) all_versions = details['release_versions'] if str(params['timestamp']) in all_versions: v = all_versions[str(params['timestamp'])] if 'git_commit_hash' in params: if v['git_commit_hash'] != params['git_commit_hash'] : return None; return v return None # if we get here, version and timestamp are not defined, so just look for the commit hash if 'git_commit_hash' in params: # check current versions for version in ['dev','beta','release']: if current_version[version]['git_commit_hash'] == params['git_commit_hash']: v = current_version[version] return v # if we get here, we have to look in full history details = self.db.get_module_full_details(module_name=params['module_name'], git_url=params['git_url']) all_versions = details['release_versions'] for timestamp, v in all_versions.iteritems(): if v['git_commit_hash'] == params['git_commit_hash']: return v return None # didn't get nothing, so return return None def list_released_versions(self, params): params = self.filter_module_or_repo_selection(params) details = self.db.get_module_full_details(module_name=params['module_name'], git_url=params['git_url']) return sorted(details['release_versions'].values(), key= lambda v: v['timestamp']) def is_registered(self,params): if 'git_url' not in params: params['git_url'] = '' if 'module_name' not in params: params['module_name'] = '' if self.db.is_registered(module_name=params['module_name'], git_url=params['git_url']) : return True return False # note: maybe a little too mongo centric, but ok for now... def list_basic_module_info(self,params): query = { 'state.active':True, 'state.released':True } if 'include_disabled' in params: if params['include_disabled']>0: query.pop('state.active',None) if 'include_released' not in params: params['include_released'] = 1 if 'include_unreleased' not in params: params['include_unreleased'] = 0 # figure out release/unreleased options so we can get just the unreleased if needed # default (if none of these matches is to list only released) if params['include_released']<=0 and params['include_unreleased']<=0: return [] # don't include anything... elif params['include_released']<=0 and params['include_unreleased']>0: # minor change that could be removed eventually: check for released=False or missing query.pop('state.released',None) query['$or']=[{'state.released':False},{'state.released':{'$exists':False}}] #query['state.released']=False # include only unreleased (only works if everything has this flag) elif params['include_released']>0 and params['include_unreleased']>0: query.pop('state.released',None) # include everything if 'owners' in params: if params['owners']: # might want to filter out empty strings in the future query['owners.kb_username']={'$in':params['owners']} return self.db.find_basic_module_info(query) def set_module_active_state(self, active, params, username): params = self.filter_module_or_repo_selection(params) if not self.is_admin(username): raise ValueError('Only Admin users can set a module to be active/inactive.') error = self.db.set_module_active_state(active, module_name=params['module_name'], git_url=params['git_url']) if error is not None: raise ValueError('Update operation failed - some unknown database error: '+error) def approve_developer(self, developer, username): if not developer: raise ValueError('No username provided') if not developer.strip(): raise ValueError('No username provided') if not self.is_admin(username): raise ValueError('Only Admin users can approve or revoke developers.') self.db.approve_developer(developer) def revoke_developer(self, developer, username): if not developer: raise ValueError('No username provided') if not developer.strip(): raise ValueError('No username provided') if not self.is_admin(username): raise ValueError('Only Admin users can approve or revoke developers.') self.db.revoke_developer(developer) def is_approved_developer(self, usernames): if not usernames: return [] return self.db.is_approved_developer(usernames) def list_approved_developers(self): dev_list = self.db.list_approved_developers() simple_kbase_dev_list = [] for d in dev_list: simple_kbase_dev_list.append(d['kb_username']) return sorted(simple_kbase_dev_list) def get_build_log(self, registration_id): try: with open(self.temp_dir+'/registration.log.'+str(registration_id)) as log_file: log = log_file.read() except: log = '[log not found - registration_id is invalid or the log has been deleted]' return log def delete_module(self,params,username): if not self.is_admin(username): raise ValueError('Only Admin users can migrate module git urls.') if 'module_name' not in params and 'git_url' not in params: raise ValueError('You must specify the "module_name" or "git_url" of the module to delete.') params = self.filter_module_or_repo_selection(params) error = self.db.delete_module(module_name=params['module_name'], git_url=params['git_url']) if error is not None: raise ValueError('Delete operation failed - some unknown database error: '+error) def migrate_module_to_new_git_url(self, params, username): if not self.is_admin(username): raise ValueError('Only Admin users can migrate module git urls.') if 'module_name' not in params: raise ValueError('You must specify the "module_name" of the module to modify.') if 'current_git_url' not in params: raise ValueError('You must specify the "current_git_url" of the module to modify.') if 'new_git_url' not in params: raise ValueError('You must specify the "new_git_url" of the module to modify.') if not bool(urlparse(params['new_git_url']).netloc): raise ValueError('The new git url is not a valid URL.') error = self.db.migrate_module_to_new_git_url(params['module_name'],params['current_git_url'],params['new_git_url']) if error is not None: raise ValueError('Update operation failed - some unknown database error: '+error) # Some utility methods def filter_module_or_repo_selection(self, params): if 'git_url' not in params: params['git_url'] = '' if 'module_name' not in params: params['module_name'] = '' if not self.db.is_registered(module_name=params['module_name'], git_url=params['git_url']) : raise ValueError('Operation failed - module/repo is not registered.') return params # always true if the user is in the admin list def has_permission(self, username, owners): if self.is_admin(username): return True for owner in owners: if username == owner['kb_username']: return True return False def is_admin(self, username): if username in self.adminList: return True return False def version(self): return biokbase.catalog.version.CATALOG_VERSION