Ejemplo n.º 1
0
    def __init__(self, params, registration_id, timestamp, username, is_admin,token, db, temp_dir, docker_base_url, 
                    docker_registry_host, docker_push_allow_insecure, nms_url, nms_admin_token, module_details,
                    ref_data_base, kbase_endpoint, prev_dev_version):
        self.db = db
        self.params = params
        # at this point, we assume git_url has been checked
        self.git_url = params['git_url']

        self.registration_id = registration_id
        self.timestamp = timestamp
        self.username = username
        self.is_admin = is_admin
        self.token = token
        self.db = db
        self.temp_dir = temp_dir
        self.docker_base_url = docker_base_url
        self.docker_registry_host = docker_registry_host
        self.docker_push_allow_insecure = docker_push_allow_insecure

        self.nms_url = nms_url

        self.nms = NarrativeMethodStore(self.nms_url, token=nms_admin_token)

        self.local_function_reader = LocalFunctionReader()

        # (most) of the mongo document for this module snapshot before this registration
        self.module_details = module_details

        self.log_buffer = [];
        self.last_log_time = time.time() # in seconds
        self.log_interval = 1.0 # save log to mongo every second
        
        self.ref_data_base = ref_data_base
        self.kbase_endpoint = kbase_endpoint
        self.prev_dev_version = prev_dev_version
def create_app_dictionary():
    #Create App Dictionary: Main function
    requests.packages.urllib3.disable_warnings()
    catalog = Catalog(url=os.environ['CATALOG_URL'])
    nms = NarrativeMethodStore(url=os.environ['NARRATIVE_METHOD_STORE'])

    apps = nms.list_methods({"tag": "release"})
    apps_datastruc = pd.DataFrame.from_dict(apps)
    ModDfApps = data_configure(apps_datastruc)
    ModDfApps.drop([
        'app_type', 'authors', 'git_commit_hash', 'icon', 'input_types',
        'module_name', 'name', 'namespace', 'output_types', 'subtitle',
        'tooltip', 'ver'
    ],
                   axis=1,
                   inplace=True)
    keys = list(
        set([
            item for sublist in list(ModDfApps.categories) for item in sublist
        ]))
    app_dict = {k: [] for k in keys}

    for i in ModDfApps.index.values:

        app_category_lst = ModDfApps["categories"][i]
        for category in app_category_lst:
            if category in app_dict.keys():
                app_dict[category].append(ModDfApps["id"][i])
                app_dict[category] = list(set(app_dict[category]))
            else:
                raise KeyError("{} not a KBase app category".format(category))
    return app_dict
Ejemplo n.º 3
0
    def __init__(self, params, registration_id, timestamp, username, is_admin,token, db, temp_dir, docker_base_url, 
                    docker_registry_host, docker_push_allow_insecure, nms_url, nms_admin_token, module_details,
                    ref_data_base, kbase_endpoint, prev_dev_version):
        self.db = db
        self.params = params
        # at this point, we assume git_url has been checked
        self.git_url = params['git_url']

        self.registration_id = registration_id
        self.timestamp = timestamp
        self.username = username
        self.is_admin = is_admin
        self.token = token
        self.db = db
        self.temp_dir = temp_dir
        self.docker_base_url = docker_base_url
        self.docker_registry_host = docker_registry_host
        self.docker_push_allow_insecure = docker_push_allow_insecure

        self.nms_url = nms_url

        self.nms = NarrativeMethodStore(self.nms_url, token=nms_admin_token)

        self.local_function_reader = LocalFunctionReader()

        # (most) of the mongo document for this module snapshot before this registration
        self.module_details = module_details

        self.log_buffer = [];
        self.last_log_time = time.time() # in seconds
        self.log_interval = 1.0 # save log to mongo every second
        
        self.ref_data_base = ref_data_base
        self.kbase_endpoint = kbase_endpoint
        self.prev_dev_version = prev_dev_version
Ejemplo n.º 4
0
def __init_client(client_name):
    if client_name == 'workspace':
        c = Workspace(URLS.workspace)
    elif client_name == 'job_service':
        c = NarrativeJobService(URLS.job_service)
    elif client_name == 'narrative_method_store':
        c = NarrativeMethodStore(URLS.narrative_method_store)
    elif client_name == 'user_and_job_state':
        c = UserAndJobState(URLS.user_and_job_state)
    elif client_name == 'catalog':
        c = Catalog(URLS.catalog)

    else:
        raise ValueError('Unknown client name "%s"' % client_name)

    __clients[client_name] = c
    return c
Ejemplo n.º 5
0
    def setUpClass(cls):

        print('++++++++++++ RUNNING core_registration_test.py +++++++++++')

        # hack for testing!! remove when docker and NMS components can be tested
        from biokbase.catalog.registrar import Registrar
        Registrar._TEST_WITHOUT_DOCKER = True

        cls.cUtil = CatalogTestUtil(
            '.')  # TODO: pass in test directory from outside
        cls.cUtil.setUp()
        cls.catalog = Catalog(cls.cUtil.getCatalogConfig())
        # approve developers we will use
        cls.catalog.approve_developer(cls.cUtil.admin_ctx(),
                                      cls.cUtil.admin_ctx()['user_id'])
        cls.catalog.approve_developer(cls.cUtil.admin_ctx(),
                                      cls.cUtil.user_ctx()['user_id'])

        cls.nms = NarrativeMethodStore(cls.cUtil.getCatalogConfig()['nms-url'])
Ejemplo n.º 6
0
def __init_client(client_name, token=None):
    if client_name == 'workspace':
        c = Workspace(URLS.workspace, token=token)
    elif client_name == 'narrative_method_store':
        c = NarrativeMethodStore(URLS.narrative_method_store, token=token)
    elif client_name == 'user_and_job_state':
        c = UserAndJobState(URLS.user_and_job_state, token=token)
    elif client_name == 'catalog':
        c = Catalog(URLS.catalog, token=token)
    elif client_name == 'service' or client_name == 'service_wizard':
        c = ServiceClient(URLS.service_wizard,
                          use_url_lookup=True,
                          token=token)
    elif client_name == 'execution_engine2' or client_name == 'execution_engine' or client_name == 'job_service':
        c = execution_engine2(URLS.execution_engine2, token=token)
    elif client_name == 'job_service_mock':
        c = JobServiceMock()
    else:
        raise ValueError('Unknown client name "%s"' % client_name)

    return c
Ejemplo n.º 7
0
def __init_client(client_name, token=None):
    if client_name == "workspace":
        c = Workspace(URLS.workspace, token=token)
    elif client_name == "execution_engine2":
        c = execution_engine2(URLS.execution_engine2, token=token)
    elif client_name == "narrative_method_store":
        c = NarrativeMethodStore(URLS.narrative_method_store, token=token)
    elif client_name == "service":
        c = ServiceClient(URLS.service_wizard,
                          use_url_lookup=True,
                          token=token)
    elif client_name == "catalog":
        c = Catalog(URLS.catalog, token=token)
    else:
        raise ValueError('Unknown client name "%s"\n' % client_name +
                         "The following client names are recognised:\n" +
                         'Catalog: "catalog"\n' +
                         'Execution Engine 2: "execution_engine2"\n' +
                         'NMS: "narrative_method_store"\n' +
                         'Service Wizard: "service"\n' +
                         'Workspace: "workspace"')

    return c
Ejemplo n.º 8
0
    def __init__(self, params, registration_id, timestamp, username, token, db, temp_dir, docker_base_url, 
                    docker_registry_host, nms_url, nms_admin_user, nms_admin_psswd, module_details):
        self.db = db
        self.params = params
        # at this point, we assume git_url has been checked
        self.git_url = params['git_url']

        self.registration_id = registration_id
        self.timestamp = timestamp
        self.username = username
        self.token = token
        self.db = db
        self.temp_dir = temp_dir
        self.docker_base_url = docker_base_url
        self.docker_registry_host = docker_registry_host

        self.nms_url = nms_url
        self.nms_admin_user = nms_admin_user
        self.nms_admin_psswd = nms_admin_psswd

        self.nms = NarrativeMethodStore(self.nms_url,user_id=self.nms_admin_user,password=self.nms_admin_psswd)

        # (most) of the mongo document for this module snapshot before this registration
        self.module_details = module_details
Ejemplo n.º 9
0
#Create App Dictionary: Main function
import requests

requests.packages.urllib3.disable_warnings()
from biokbase.catalog.Client import Catalog
from biokbase.narrative_method_store.client import NarrativeMethodStore

catalog = Catalog(url="https://kbase.us/services/catalog")
nms = NarrativeMethodStore(
    url="https://kbase.us/services/narrative_method_store/rpc")
from data_configure import data_configure

import pandas as pd


def create_app_dictionary():
    apps = nms.list_methods({"tag": "release"})
    apps_datastruc = pd.DataFrame.from_dict(apps)
    ModDfApps = data_configure(apps_datastruc)
    ModDfApps.drop([
        'app_type', 'authors', 'git_commit_hash', 'icon', 'input_types',
        'module_name', 'name', 'namespace', 'output_types', 'subtitle',
        'tooltip', 'ver'
    ],
                   axis=1,
                   inplace=True)
    keys = list(
        set([
            item for sublist in list(ModDfApps.categories) for item in sublist
        ]))
    app_dict = {k: [] for k in keys}
Ejemplo n.º 10
0
class Registrar:

    # params is passed in from the controller, should be the same as passed into the spec
    # db is a reference to the Catalog DB interface (usually a MongoCatalogDBI instance)
    def __init__(self, params, registration_id, timestamp, username, is_admin,
                 token, db, temp_dir, docker_base_url, docker_registry_host,
                 docker_push_allow_insecure, nms_url, nms_admin_token,
                 module_details, ref_data_base, kbase_endpoint,
                 prev_dev_version):
        self.db = db
        self.params = params
        # at this point, we assume git_url has been checked
        self.git_url = params['git_url']

        self.registration_id = registration_id
        self.timestamp = timestamp
        self.username = username
        self.is_admin = is_admin
        self.token = token
        self.db = db
        self.temp_dir = temp_dir
        self.docker_base_url = docker_base_url
        self.docker_registry_host = docker_registry_host
        self.docker_push_allow_insecure = docker_push_allow_insecure

        self.nms_url = nms_url

        self.nms = NarrativeMethodStore(self.nms_url, token=nms_admin_token)

        self.local_function_reader = LocalFunctionReader()

        # (most) of the mongo document for this module snapshot before this registration
        self.module_details = module_details

        self.log_buffer = []
        self.last_log_time = time.time()  # in seconds
        self.log_interval = 1.0  # save log to mongo every second

        self.ref_data_base = ref_data_base
        self.kbase_endpoint = kbase_endpoint
        self.prev_dev_version = prev_dev_version

    def start_registration(self):
        try:
            self.logfile = codecs.open(
                self.temp_dir + '/registration.log.' + self.registration_id,
                'w', 'utf-8')
            self.log('Registration started on ' +
                     str(datetime.datetime.now()) + ' by ' + self.username)
            self.log('Registration ID: ' + str(self.registration_id))
            self.log('Registration Parameters: ' + str(self.params))

            ##############################
            # 1 - clone the repo into the temp directory that should already be reserved for us
            self.set_build_step('cloning git repo')
            if not os.path.isdir(
                    os.path.join(self.temp_dir, self.registration_id)):
                raise (
                    'Directory for the git clone was not allocated!  This is an internal catalog server error, please report this problem.'
                )

            basedir = os.path.join(self.temp_dir, self.registration_id,
                                   'module_repo')

            parsed_url = urlparse(self.git_url)

            self.log('Attempting to clone into: ' + basedir)
            self.log('git clone ' + self.git_url)
            subprocess.check_call(['git', 'clone', self.git_url, basedir])
            # try to get hash from repo
            git_commit_hash = str(
                subprocess.check_output(
                    ['git', 'log', '--pretty=%H', '-n', '1'],
                    cwd=basedir)).rstrip()
            self.log('current commit hash at HEAD:' + git_commit_hash)
            if 'git_commit_hash' in self.params:
                if self.params['git_commit_hash']:
                    self.log('git checkout ' +
                             self.params['git_commit_hash'].strip())
                    subprocess.check_call([
                        'git', 'checkout', '--quiet',
                        self.params['git_commit_hash']
                    ],
                                          cwd=basedir)
                    git_commit_hash = self.params['git_commit_hash'].strip()

            # check if this was a git_commit_hash that was already released- if so, we abort for now (we could just update the dev tag in the future)
            for r in self.module_details['release_version_list']:
                if r['git_commit_hash'] == git_commit_hash:
                    raise ValueError(
                        'The specified commit is already released.  You cannot reregister that commit version or image.'
                    )

            # do the same for beta versions for now
            if 'beta' in self.module_details[
                    'current_versions'] and self.module_details[
                        'current_versions']['beta'] is not None:
                if self.module_details['current_versions']['beta'][
                        'git_commit_hash'] == git_commit_hash:
                    raise ValueError(
                        'The specified commit is already registered and in beta.  You cannot reregister that commit version or image.'
                    )

            ##############################
            # 2 - sanity check (things parse, files exist, module_name matches, etc)
            self.set_build_step('reading files and performing basic checks')
            self.sanity_checks_and_parse(basedir, git_commit_hash)

            ##############################
            # 2.5 - dealing with git releases .git/config.lock, if it still exists after 5s then kill it
            ###### should no longer need this after switching to subprocess
            #            git_config_lock_file = os.path.join(basedir, ".git", "config.lock")
            #            if os.path.exists(git_config_lock_file):
            #                self.log('.git/config.lock exists, waiting 5s for it to release')
            #                time.sleep(5)
            #                if os.path.exists(git_config_lock_file):
            #                    self.log('.git/config.lock file still there, we are just going to delete it....')
            #                    os.remove(git_config_lock_file)

            ##############################
            # 3 docker build - in progress
            # perhaps make this a self attr?
            module_name_lc = self.get_required_field_as_string(
                self.kb_yaml, 'module-name').strip().lower()
            self.image_name = self.docker_registry_host + '/kbase:' + module_name_lc + '.' + str(
                git_commit_hash)
            ref_data_folder = None
            ref_data_ver = None
            compilation_report = None
            if not Registrar._TEST_WITHOUT_DOCKER:
                # timeout set to 24 hours because we often get timeouts if multiple people try to push at the same time
                dockerclient = None
                docker_timeout = 86400
                if len(str(self.docker_base_url)) > 0:
                    dockerclient = DockerClient(base_url=str(
                        self.docker_base_url),
                                                timeout=docker_timeout)
                else:
                    # docker base URL is not set in config, let's use Docker-related env-vars in this case
                    docker_host = os.environ['DOCKER_HOST']
                    if docker_host is None or len(docker_host) == 0:
                        raise ValueError(
                            'Docker host should be defined either in configuration '
                            '(docker-base-url property) or in DOCKER_HOST environment variable'
                        )
                    docker_tls_verify = os.environ['DOCKER_TLS_VERIFY']
                    if docker_host.startswith('tcp://'):
                        docker_protocol = "http"
                        if (docker_tls_verify
                                is not None) and docker_tls_verify == '1':
                            docker_protocol = "https"
                        docker_host = docker_host.replace(
                            'tcp://', docker_protocol + '://')
                    docker_cert_path = os.environ['DOCKER_CERT_PATH']
                    docker_tls = False
                    if (docker_cert_path
                            is not None) and len(docker_cert_path) > 0:
                        docker_tls = DockerTLSConfig(
                            verify=False,
                            client_cert=(docker_cert_path + '/cert.pem',
                                         docker_cert_path + '/key.pem'))
                    self.log(
                        "Docker settings from environment variables are used: docker-host = "
                        + docker_host + ", docker_cert_path = " +
                        str(docker_cert_path))
                    dockerclient = DockerClient(base_url=docker_host,
                                                timeout=docker_timeout,
                                                version='auto',
                                                tls=docker_tls)
                # look for docker image
                # this tosses cookies if image doesn't exist, so wrap in try, and build if try reports "not found"
                #self.log(str(dockerclient.inspect_image(repo_name)))
                # if image does not exist, build and set state
                self.set_build_step('building the docker image')
                # imageId is not yet populated properly
                imageId = self.build_docker_image(dockerclient,
                                                  self.image_name, basedir)

                # check if reference data version is defined in kbase.yml
                if 'data-version' in self.kb_yaml:
                    ref_data_ver = str(self.kb_yaml['data-version']).strip()
                    if ref_data_ver:
                        ref_data_folder = module_name_lc
                        target_ref_data_dir = os.path.join(
                            self.ref_data_base, ref_data_folder, ref_data_ver)
                        if os.path.exists(target_ref_data_dir):
                            self.log(
                                "Reference data for " + ref_data_folder + "/" +
                                ref_data_ver + " was " +
                                "already prepared, initialization step is skipped"
                            )
                        else:
                            self.set_build_step(
                                'preparing reference data (running init entry-point), '
                                + 'ref-data version: ' + ref_data_ver)
                            self.prepare_ref_data(
                                dockerclient, self.image_name,
                                self.ref_data_base, ref_data_folder,
                                ref_data_ver, basedir, self.temp_dir,
                                self.registration_id, self.token,
                                self.kbase_endpoint)

                self.set_build_step('preparing compilation report')
                self.log('Preparing compilation report.')

                # Trying to extract compilation report with line numbers of funcdefs from docker image.
                # There is "report" entry-point command responsible for that. In case there are any
                # errors we just skip it.
                compilation_report = self.prepare_compilation_report(
                    dockerclient, self.image_name, basedir, self.temp_dir,
                    self.registration_id, self.token, self.kbase_endpoint)

                if compilation_report is None:
                    raise ValueError(
                        'Unable to generate a compilation report, which is now required, so your registration cannot continue.  '
                        +
                        'If you have been successfully registering this module already, this means that you may need to update '
                        +
                        'to the latest version of the KBase SDK and rebuild your makefile.'
                    )

                self.local_function_reader.finish_validation(
                    compilation_report)

                self.log('Report complete')

                self.set_build_step('pushing docker image to registry')
                self.push_docker_image(dockerclient, self.image_name)

            else:
                self.log(
                    'IN TEST MODE!! SKIPPING DOCKER BUILD AND DOCKER REGISTRY UPDATE!!'
                )

            # 4 - Update the DB
            self.set_build_step('updating the catalog')
            self.update_the_catalog(basedir, ref_data_folder, ref_data_ver,
                                    compilation_report)

            self.build_is_complete()

        except Exception as e:
            # set the build state to error and log it
            self.set_build_error(str(e))
            self.log(traceback.format_exc(), is_error=True)
            self.log('BUILD_ERROR: ' + str(e), is_error=True)
            if self.prev_dev_version:
                self.log('Reverting dev version to git_commit_hash=' +
                         self.prev_dev_version['git_commit_hash'] +
                         ', version=' + self.prev_dev_version['version'] +
                         ', git_commit_message=' +
                         self.prev_dev_version['git_commit_message'])
                self.db.update_dev_version(self.prev_dev_version,
                                           git_url=self.git_url)
        finally:
            self.flush_log_to_db()
            self.logfile.close()
            self.cleanup()

    def sanity_checks_and_parse(self, basedir, git_commit_hash):
        # check that files exist
        yaml_filename = 'kbase.yaml'
        if not os.path.isfile(os.path.join(basedir, 'kbase.yaml')):
            if not os.path.isfile(os.path.join(basedir, 'kbase.yml')):
                raise ValueError(
                    'kbase.yaml file does not exist in repo, but is required!')
            else:
                yaml_filename = 'kbase.yml'
        # parse some stuff, and check for things

        with codecs.open(os.path.join(basedir, yaml_filename),
                         'r',
                         "utf-8",
                         errors='ignore') as kb_yaml_file:
            kb_yaml_string = kb_yaml_file.read()
        self.kb_yaml = yaml.load(kb_yaml_string)
        self.log('=====kbase.yaml parse:')
        self.log(pprint.pformat(self.kb_yaml))
        self.log('=====end kbase.yaml')

        module_name = self.get_required_field_as_string(
            self.kb_yaml, 'module-name').strip()
        module_description = self.get_required_field_as_string(
            self.kb_yaml, 'module-description').strip()
        version = self.get_required_field_as_string(self.kb_yaml,
                                                    'module-version').strip()

        # must be a semantic version
        if not semantic_version.validate(version):
            raise ValueError(
                'Invalid version string in kbase.yaml - must be in semantic version format.  See http://semver.org'
            )

        service_language = self.get_required_field_as_string(
            self.kb_yaml, 'service-language').strip()
        owners = self.get_required_field_as_list(self.kb_yaml, 'owners')

        service_config = self.get_optional_field_as_dict(
            self.kb_yaml, 'service-config')
        if service_config:
            # validate service_config parameters
            if 'dynamic-service' in service_config:
                if not type(service_config['dynamic-service']) == type(True):
                    raise ValueError(
                        'Invalid service-config in kbase.yaml - "dynamic-service" property must be a boolean "true" or "false".'
                    )

        # module_name must match what exists (unless it is not yet defined)
        if 'module_name' in self.module_details:
            if self.module_details['module_name'] != module_name:
                raise ValueError(
                    'kbase.yaml file module_name field has changed since last version! '
                    +
                    'Module names are permanent- if this is a problem, contact a kbase admin.'
                )
        else:
            # This must be the first registration, so the module must not exist yet
            self.check_that_module_name_is_valid(module_name)

        # associate the module_name with the log file for easier searching (if we fail sooner, then the module name
        # cannot be used to lookup this log)
        self.db.set_build_log_module_name(self.registration_id, module_name)

        # you can't remove yourself from the owners list, or register something that you are not an owner of
        if self.username not in owners and self.is_admin is False:
            raise ValueError(
                'Your kbase username (' + self.username +
                ') must be in the owners list in the kbase.yaml file.')

        # OPTIONAL TODO: check if all the users are on the owners list?  not necessarily required, because we
        # do a check during registration of the person who started the registration...

        # TODO: check for directory structure, method spec format, documentation, version
        self.validate_method_specs(basedir)

        # initial validation of the local function specifications
        lf_report = self.local_function_reader.parse_and_basic_validation(
            basedir, self.module_details, module_name, version,
            git_commit_hash)
        self.log(
            self.local_function_reader.report_to_string_for_log(lf_report))

        if len(lf_report['functions_errored']) > 0:
            raise ValueError('Errors exist in local function specifications.')

        # return the parse so we can figure things out later
        return self.kb_yaml

    def check_that_module_name_is_valid(self, module_name):
        if self.db.is_registered(module_name=module_name):
            raise ValueError(
                'Module name (in kbase.yaml) is already registered.  Please specify a different name and try again.'
            )
        if self.db.module_name_lc_exists(module_name_lc=module_name.lower()):
            raise ValueError(
                'The case-insensitive module name (in kbase.yaml) is not unique.  Please specify a different name.'
            )
        # only allow alphanumeric and underscore
        if not re.match(r'^[A-Za-z0-9_]+$', module_name):
            raise ValueError(
                'Module names must be alphanumeric characters (including underscores) only, with no spaces.'
            )

    def update_the_catalog(self, basedir, ref_data_folder, ref_data_ver,
                           compilation_report):
        # get the basic info that we need
        commit_hash = str(
            subprocess.check_output(['git', 'log', '--pretty=%H', '-n', '1'],
                                    cwd=basedir)).rstrip()
        commit_message = str(
            subprocess.check_output(['git', 'log', '--pretty=%B', '-n', '1'],
                                    cwd=basedir)).rstrip()

        module_name = self.get_required_field_as_string(
            self.kb_yaml, 'module-name')
        module_description = self.get_required_field_as_string(
            self.kb_yaml, 'module-description')
        version = self.get_required_field_as_string(self.kb_yaml,
                                                    'module-version')
        service_language = self.get_required_field_as_string(
            self.kb_yaml, 'service-language')
        owners = self.get_required_field_as_list(self.kb_yaml, 'owners')
        service_config = self.get_optional_field_as_dict(
            self.kb_yaml, 'service-config')

        # first update the module name, which is now permanent, if we haven't already
        if ('module_name'
                not in self.module_details) or ('module_name_lc'
                                                not in self.module_details):
            error = self.db.set_module_name(self.git_url, module_name)
            if error is not None:
                raise ValueError(
                    'Unable to set module_name - there was an internal database error.'
                    + error)

        # TODO: Could optimize by combining all these things into one mongo call, but for now this is easier.
        # Combining it into one call would just mean that this update happens as a single transaction, but a partial
        # update for now that fails midstream is probably not a huge issue- we can always reregister.

        # next update the basic information
        info = {
            'description': module_description,
            'language': service_language
        }
        if service_config and service_config['dynamic-service']:
            info['dynamic_service'] = 1
        else:
            info['dynamic_service'] = 0

        local_functions = self.local_function_reader.extract_lf_records()
        if len(local_functions) > 0:
            info['local_functions'] = 1
        else:
            info['local_functions'] = 0

        self.log('new info: ' + pprint.pformat(info))
        error = self.db.set_module_info(info, git_url=self.git_url)
        if error is not None:
            raise ValueError(
                'Unable to set module info - there was an internal database error: '
                + str(error))

        # next update the owners
        ownersListForUpdate = []
        for o in owners:
            # TODO: add some validation that the username is a valid kbase user
            ownersListForUpdate.append({'kb_username': o})
        self.log('new owners list: ' + pprint.pformat(ownersListForUpdate))
        error = self.db.set_module_owners(ownersListForUpdate,
                                          git_url=self.git_url)
        if error is not None:
            raise ValueError(
                'Unable to set module owners - there was an internal database error: '
                + str(error))

        # finally update the actual dev version info
        narrative_methods = []
        if os.path.isdir(os.path.join(basedir, 'ui', 'narrative', 'methods')):
            for m in os.listdir(
                    os.path.join(basedir, 'ui', 'narrative', 'methods')):
                if os.path.isdir(
                        os.path.join(basedir, 'ui', 'narrative', 'methods',
                                     m)):
                    narrative_methods.append(m)

        if len(local_functions) > 0:
            self.log('Saving local function specs:')
            self.log(pprint.pformat(local_functions))
            error = self.db.save_local_function_specs(local_functions)
            if error is not None:
                raise ValueError(
                    'There was an error saving local function specs, DB says: '
                    + str(error))

        new_version = {
            'module_name': module_name.strip(),
            'module_name_lc': module_name.strip().lower(),
            'module_description': module_description,
            'released': 0,
            'released_timestamp': None,
            'notes': '',
            'timestamp': self.timestamp,
            'registration_id': self.registration_id,
            'version': version,
            'git_commit_hash': commit_hash,
            'git_commit_message': commit_message,
            'narrative_methods': narrative_methods,
            'local_functions': self.local_function_reader.extract_lf_names(),
            'docker_img_name': self.image_name,
            'compilation_report': compilation_report
        }
        if ref_data_ver:
            new_version['data_folder'] = ref_data_folder
            new_version['data_version'] = ref_data_ver
        if service_config and service_config['dynamic-service']:
            new_version['dynamic_service'] = 1
        else:
            new_version['dynamic_service'] = 0

        self.log('new dev version object: ' + pprint.pformat(new_version))
        error = self.db.update_dev_version(new_version)
        if error is not None:
            raise ValueError(
                'Unable to update dev version - there was an internal database error: '
                + str(error))

        #push to NMS
        self.log('registering specs with NMS')
        self.nms.register_repo({
            'git_url': self.git_url,
            'git_commit_hash': commit_hash
        })

        self.log('\ndone')

        # done!!!

    def validate_method_specs(self, basedir):
        self.log('validating narrative method specifications')
        if os.path.isdir(os.path.join(basedir, 'ui', 'narrative', 'methods')):
            for m in os.listdir(
                    os.path.join(basedir, 'ui', 'narrative', 'methods')):
                if os.path.isdir(
                        os.path.join(basedir, 'ui', 'narrative', 'methods',
                                     m)):
                    self.log('    - validating method: ' + m)
                    # first grab the spec and display files, which are required
                    method_path = os.path.join(basedir, 'ui', 'narrative',
                                               'methods', m)
                    if not os.path.isfile(
                            os.path.join(method_path, 'spec.json')):
                        raise ValueError(
                            'Invalid narrative method specification (' + m +
                            '): No spec.json file defined.')
                    if not os.path.isfile(
                            os.path.join(method_path, 'display.yaml')):
                        raise ValueError(
                            'Invalid narrative method specification (' + m +
                            '): No spec.json file defined.')

                    with codecs.open(os.path.join(method_path, 'spec.json'),
                                     'r',
                                     "utf-8",
                                     errors='ignore') as spec_json_file:
                        spec_json = spec_json_file.read()
                    with codecs.open(os.path.join(method_path, 'display.yaml'),
                                     'r',
                                     "utf-8",
                                     errors='ignore') as display_yaml_file:
                        display_yaml = display_yaml_file.read()

                    # gather any extra html files
                    extraFiles = {}
                    for extra_file_name in os.listdir(
                            os.path.join(method_path)):
                        if not os.path.isfile(
                                os.path.join(method_path, extra_file_name)):
                            break
                        if not extra_file_name.endswith('.html'): break
                        with codecs.open(oos.path.join(method_path,
                                                       extra_file_name),
                                         'r',
                                         "utf-8",
                                         errors='ignore') as extra_file:
                            extrafiles[extra_file_name] = extra_file.read()

                    # validate against the NMS target endpoint
                    result = self.nms.validate_method({
                        'id': m,
                        'spec_json': spec_json,
                        'display_yaml': display_yaml,
                        'extra_files': extraFiles
                    })

                    # inspect results
                    if result['is_valid'] > 0:
                        self.log('        - valid!')
                        if 'warnings' in result:
                            if result['warnings']:
                                for w in result['warnings']:
                                    self.log('        - warning: ' + w)
                    else:
                        self.log('        - not valid!', is_error=True)
                        if 'errors' in result:
                            if result['errors']:
                                for e in result['errors']:
                                    self.log('        - error: ' + e,
                                             is_error=True)
                        else:
                            self.log('        - error is undefined!' + e,
                                     is_error=True)

                        raise ValueError(
                            'Invalid narrative method specification (' + m +
                            ')')

        else:
            self.log(
                '    - no ui/narrative/methods directory found, so no narrative methods will be deployed'
            )

    def get_required_field_as_string(self, kb_yaml, field_name):
        if field_name not in kb_yaml:
            raise ValueError('kbase.yaml file missing "' + field_name +
                             '" required field')
        value = kb_yaml[field_name].strip()
        if not value:
            raise ValueError('kbase.yaml file missing value for "' +
                             field_name + '" required field')
        return value

    def get_required_field_as_list(self, kb_yaml, field_name):
        if field_name not in kb_yaml:
            raise ValueError('kbase.yaml file missing "' + field_name +
                             '" required field')
        value = kb_yaml[field_name]
        if not type(value) is list:
            raise ValueError('kbase.yaml file "' + field_name +
                             '" required field must be a list')
        return value

    def get_optional_field_as_dict(self, kb_yaml, field_name):
        if field_name not in kb_yaml:
            return None
        value = kb_yaml[field_name]
        if not type(value) is dict:
            raise ValueError('kbase.yaml file "' + field_name +
                             '" optional field must be a dict')
        return value

    def log(self, message, no_end_line=False, is_error=False):
        if no_end_line:
            content = message
        else:
            content = message + '\n'
        self.logfile.write(content)
        self.logfile.flush()

        lines = content.splitlines()
        for l in lines:
            # add each line to the buffer
            if len(l) > 1000:
                l = l[0:1000] + ' ... truncated to 1k characters of ' + str(
                    len(l))
            self.log_buffer.append({'content': l + '\n', 'error': is_error})

        # save the buffer to mongo if enough time has elapsed, or the buffer is more than 1000 lines
        if (time.time() - self.last_log_time > self.log_interval) or (len(
                self.log_buffer) > 1000):
            self.flush_log_to_db()

    def flush_log_to_db(self):
        # todo: if we lose log lines, that's ok.  Make sure we handle case if log is larger than mongo doc size
        self.db.append_to_build_log(self.registration_id, self.log_buffer)
        self.log_buffer = []  #clear the buffer
        self.last_log_time = time.time()  # reset the log timer

    def set_build_step(self, step):
        self.db.set_module_registration_state(git_url=self.git_url,
                                              new_state='building: ' + step)
        self.db.set_build_log_state(self.registration_id, 'building: ' + step)

    def set_build_error(self, error_message):
        self.db.set_module_registration_state(git_url=self.git_url,
                                              new_state='error',
                                              error_message=error_message)
        self.db.set_build_log_state(self.registration_id,
                                    'error',
                                    error_message=error_message)

    def build_is_complete(self):
        self.db.set_module_registration_state(git_url=self.git_url,
                                              new_state='complete')
        self.db.set_build_log_state(self.registration_id, 'complete')

    def cleanup(self):
        if os.path.isdir(os.path.join(self.temp_dir, self.registration_id)):
            shutil.rmtree(os.path.join(self.temp_dir, self.registration_id))

    def build_docker_image(self, docker_client, image_name, basedir):
        self.log('\nBuilding the docker image for ' + image_name)

        # examine stream to determine success/failure of build
        imageId = None
        for lines in docker_client.build(path=basedir,
                                         rm=True,
                                         tag=image_name,
                                         pull=False):
            for line in lines.strip().splitlines():
                line_parse = json.loads(line.strip())
                if 'stream' in line_parse:
                    self.log(line_parse['stream'], no_end_line=True)
                if 'errorDetail' in line_parse:
                    self.log(str(line_parse), no_end_line=True)
                    raise ValueError('Docker build failed: ' +
                                     str(line_parse['errorDetail']))

        imageId = docker_client.inspect_image(image_name)['Id']

        self.log('Docker build successful.')
        self.log('    Image Id:   ' + str(imageId))
        self.log('    Image Name: ' + str(image_name) + '\n\n')
        return imageId

    def push_docker_image(self, docker_client, image_name):
        self.log('\nPushing docker image to registry for ' + image_name)
        colon_pos = image_name.rfind(
            ':'
        )  # This logic supports images with "host:port/" prefix for private registry
        image = image_name[:colon_pos]
        tag = image_name[colon_pos + 1:]
        #response = [ line for line in docker_client.push(image, tag=tag, stream=True) ]
        #response_stream = response
        #self.log(str(response_stream))

        # to do: examine stream to determine success/failure of build
        if self.docker_push_allow_insecure:
            print("Docker push: insecure_registry: " +
                  str(self.docker_push_allow_insecure))
        else:
            print("Docker push: insecure_registry: None")
        for lines in docker_client.push(
                image,
                tag=tag,
                stream=True,
                insecure_registry=self.docker_push_allow_insecure):
            for line in lines.strip().splitlines():
                # example line:
                #'{"status":"Pushing","progressDetail":{"current":32,"total":32},"progress":"[==================================================\\u003e]     32 B/32 B","id":"da200da4256c"}'
                line_parse = json.loads(line)
                log_line = ''
                if 'id' in line_parse:
                    log_line += line_parse['id'] + ' - '
                if 'status' in line_parse:
                    log_line += line_parse['status']
                if 'progress' in line_parse:
                    log_line += ' - ' + line_parse['progress']
                #if 'progressDetail' in line_parse:
                #    self.log(' - ' + str(line_parse['progressDetail']),no_end_line=True)

                # catch anything unexpected, we should probably throw an error here
                for key in line_parse:
                    if key not in [
                            'id', 'status', 'progress', 'progressDetail'
                    ]:
                        log_line += '[' + key + '=' + str(
                            line_parse[key]) + '] '

                self.log(log_line)

                if 'error' in line_parse:
                    self.log(str(line_parse), no_end_line=True)
                    raise ValueError('Docker push failed: ' +
                                     str(line_parse['error']))

        self.log('done pushing docker image to registry for ' + image_name +
                 '\n')

    def run_docker_container(self,
                             dockerclient,
                             image_name,
                             token,
                             kbase_endpoint,
                             binds,
                             work_dir,
                             command,
                             print_details=False):
        cnt_id = None
        try:
            token_file = os.path.join(work_dir, "token")
            with open(token_file, "w") as file:
                file.write(token)
            config_file = os.path.join(work_dir, "config.properties")
            with open(config_file, "w") as file:
                file.write("[global]\n" + "job_service_url = " +
                           kbase_endpoint + "/userandjobstate\n" +
                           "workspace_url = " + kbase_endpoint + "/ws\n" +
                           "shock_url = " + kbase_endpoint + "/shock-api\n" +
                           "kbase_endpoint = " + kbase_endpoint + "\n")
            if not binds:
                binds = {}
            binds[work_dir] = {"bind": "/kb/module/work", "mode": "rw"}
            container = dockerclient.create_container(
                image=image_name,
                command=command,
                tty=True,
                host_config=dockerclient.create_host_config(binds=binds))
            cnt_id = container.get('Id')
            self.log('Running "' + command +
                     '" entry-point command, container Id=' + cnt_id)
            if print_details:
                self.log("Command details:")
                self.log("    Image name: " + image_name)
                self.log("    Binds: " + str(binds))
                self.log("    KBase-endpoint: " + kbase_endpoint)
                self.log("    Necessary files in '" + work_dir +
                         "': 'token', 'config.properties'")
                self.log("    Tty: True")
                self.log("    Docker command: " + command)
            dockerclient.start(container=cnt_id)
            stream = dockerclient.logs(container=cnt_id,
                                       stdout=True,
                                       stderr=True,
                                       stream=True)
            line = ""
            for char in stream:
                if char == '\r':
                    continue
                if char == '\n':
                    self.log(line)
                    line = ""
                else:
                    line += char
            if len(line) > 0:
                self.log(line)
        finally:
            # cleaning up the container
            try:
                if cnt_id:
                    dockerclient.remove_container(container=cnt_id,
                                                  v=True,
                                                  force=True)
                self.log("Docker container (Id=" + cnt_id + ") was cleaned up")
            except:
                pass

    def prepare_ref_data(self, dockerclient, image_name, ref_data_base,
                         ref_data_folder, ref_data_ver, basedir, temp_dir,
                         registration_id, token, kbase_endpoint):
        self.log(
            '\nReference data: creating docker container for initialization')
        if not os.path.exists(ref_data_base):
            raise ValueError("Reference data network folder doesn't exist: " +
                             ref_data_base)
        upper_target_dir = os.path.join(ref_data_base, ref_data_folder)
        if not os.path.exists(upper_target_dir):
            os.mkdir(upper_target_dir)
        temp_ref_data_dir = os.path.join(upper_target_dir,
                                         "temp_" + registration_id)
        try:
            repo_data_dir = os.path.join(basedir, "data")
            os.mkdir(temp_ref_data_dir)
            binds = {
                temp_ref_data_dir: {
                    "bind": "/data",
                    "mode": "rw"
                },
                repo_data_dir: {
                    "bind": "/kb/module/data",
                    "mode": "rw"
                }
            }
            temp_work_dir = os.path.join(temp_dir, registration_id,
                                         'ref_data_workdir')
            os.mkdir(temp_work_dir)
            self.run_docker_container(dockerclient,
                                      image_name,
                                      token,
                                      kbase_endpoint,
                                      binds,
                                      temp_work_dir,
                                      'init',
                                      print_details=True)
            ready_file = os.path.join(temp_ref_data_dir, "__READY__")
            if os.path.exists(ready_file):
                target_dir = os.path.join(upper_target_dir, ref_data_ver)
                os.rename(temp_ref_data_dir, target_dir)
                self.log("Reference data was successfully deployed into " +
                         target_dir)
            else:
                raise ValueError(
                    "__READY__ file is not detected in reference data folder, produced data will be discarded"
                )
        finally:
            # cleaning up temporary ref-data (if not renamed into permanent after success)
            try:
                if os.path.exists(temp_ref_data_dir):
                    shutil.rmtree(temp_ref_data_dir)
            except:
                pass

    def prepare_compilation_report(self, dockerclient, image_name, basedir,
                                   temp_dir, registration_id, token,
                                   kbase_endpoint):
        self.log('\nCompilation report: creating docker container')
        try:
            temp_work_dir = os.path.join(temp_dir, registration_id,
                                         'report_workdir')
            os.mkdir(temp_work_dir)
            self.run_docker_container(dockerclient, image_name, token,
                                      kbase_endpoint, None, temp_work_dir,
                                      'report')
            report_file = os.path.join(temp_work_dir, 'compile_report.json')
            if not os.path.exists(report_file):
                self.log("Report file doesn't exist: " + report_file)
                return None
            else:
                with codecs.open(report_file, 'r', 'utf-8',
                                 errors='ignore') as f:
                    return json.load(f)
        except Exception, e:
            self.log("Error preparing compilation log: " + str(e))
        return None
Ejemplo n.º 11
0
class Registrar:

    # params is passed in from the controller, should be the same as passed into the spec
    # db is a reference to the Catalog DB interface (usually a MongoCatalogDBI instance)
    def __init__(self, params, registration_id, timestamp, username, token, db, temp_dir, docker_base_url, 
                    docker_registry_host, nms_url, nms_admin_user, nms_admin_psswd, module_details):
        self.db = db
        self.params = params
        # at this point, we assume git_url has been checked
        self.git_url = params['git_url']

        self.registration_id = registration_id
        self.timestamp = timestamp
        self.username = username
        self.token = token
        self.db = db
        self.temp_dir = temp_dir
        self.docker_base_url = docker_base_url
        self.docker_registry_host = docker_registry_host

        self.nms_url = nms_url
        self.nms_admin_user = nms_admin_user
        self.nms_admin_psswd = nms_admin_psswd

        self.nms = NarrativeMethodStore(self.nms_url,user_id=self.nms_admin_user,password=self.nms_admin_psswd)

        # (most) of the mongo document for this module snapshot before this registration
        self.module_details = module_details

    def start_registration(self):
        try:
            self.logfile = open(self.temp_dir+'/registration.log.'+self.registration_id, 'w')
            self.log('Registration started on '+ str(datetime.datetime.now()) + ' by '+self.username)
            self.log('Registration ID: '+str(self.registration_id));
            self.log('Registration Parameters: '+str(self.params));

            ##############################
            # 1 - clone the repo into the temp directory that should already be reserved for us
            self.set_build_step('cloning git repo')
            if not os.path.isdir(os.path.join(self.temp_dir,self.registration_id)):
                raise('Directory for the git clone was not allocated!  This is an internal catalog server error, please report this problem.')

            basedir = os.path.join(self.temp_dir,self.registration_id,'module_repo')

            self.log('Attempting to clone into: '+basedir);
            self.log('git clone ' + self.git_url)
            repo = git.Repo.clone_from(self.git_url, basedir)
            # try to get hash from repo
            self.log('current commit hash at HEAD:' + str(repo.heads.master.commit))
            git_commit_hash = repo.heads.master.commit
            if 'git_commit_hash' in self.params:
                if self.params['git_commit_hash']:
                    self.log('git checkout ' + self.params['git_commit_hash'].strip())
                    repo.git.checkout(self.params['git_commit_hash'].strip())
                    git_commit_hash = self.params['git_commit_hash'].strip()

            ##############################
            # 2 - sanity check (things parse, files exist, module_name matches, etc)
            self.set_build_step('reading files and performing basic checks')
            self.sanity_checks_and_parse(repo, basedir)


            ##############################
            # 3 docker build - in progress
            # perhaps make this a self attr?
            module_name_lc = self.get_required_field_as_string(self.kb_yaml,'module-name').strip().lower()
            self.image_name = self.docker_registry_host + '/' + module_name_lc + ':' + str(git_commit_hash)
            if not Registrar._TEST_WITHOUT_DOCKER:
                dockerclient = DockerClient(base_url = str(self.docker_base_url),timeout=360)
                # look for docker image
                # this tosses cookies if image doesn't exist, so wrap in try, and build if try reports "not found"
                #self.log(str(dockerclient.inspect_image(repo_name)))
                # if image does not exist, build and set state
                self.set_build_step('building the docker image')
                # imageId is not yet populated properly
                imageId = self.build_docker_image(dockerclient,self.image_name,basedir)

                self.set_build_step('pushing docker image to registry')
                self.push_docker_image(dockerclient,self.image_name)

                #self.log(str(dockerClient.containers()));
            else:
                self.log('IN TEST MODE!! SKIPPING DOCKER BUILD AND DOCKER REGISTRY UPDATE!!')

            # 4 - Update the DB
            self.set_build_step('updating the catalog')
            self.update_the_catalog(repo, basedir)
            
            self.build_is_complete()

        except Exception as e:
            # set the build state to error and log it
            self.set_build_error(str(e))
            self.log(traceback.format_exc())
            self.log('BUILD_ERROR: '+str(e))
        finally:
            self.logfile.close();
            self.cleanup();



    def sanity_checks_and_parse(self, repo, basedir):
        # check that files exist
        yaml_filename = 'kbase.yaml'
        if not os.path.isfile(os.path.join(basedir,'kbase.yaml')) :
            if not os.path.isfile(os.path.join(basedir,'kbase.yml')):
                raise ValueError('kbase.yaml file does not exist in repo, but is required!')
            else:
                yaml_filename = 'kbase.yml'
        # parse some stuff, and check for things
        with open(os.path.join(basedir,yaml_filename)) as kb_yaml_file:
            kb_yaml_string = kb_yaml_file.read()
        self.kb_yaml = yaml.load(kb_yaml_string)
        self.log('=====kbase.yaml parse:')
        self.log(pprint.pformat(self.kb_yaml))
        self.log('=====end kbase.yaml')

        module_name = self.get_required_field_as_string(self.kb_yaml,'module-name').strip()
        module_description = self.get_required_field_as_string(self.kb_yaml,'module-description').strip()
        version = self.get_required_field_as_string(self.kb_yaml,'module-version').strip()
        service_language = self.get_required_field_as_string(self.kb_yaml,'service-language').strip()
        owners = self.get_required_field_as_list(self.kb_yaml,'owners')

        # module_name must match what exists (unless it is not yet defined)
        if 'module_name' in self.module_details:
            if self.module_details['module_name'] != module_name:
                raise ValueError('kbase.yaml file module_name field has changed since last version! ' +
                                    'Module names are permanent- if this is a problem, contact a kbase admin.')
        else:
            # This must be the first registration, so the module must not exist yet
            self.check_that_module_name_is_valid(module_name);

        # you can't remove yourself from the owners list, or register something that you are not an owner of
        if self.username not in owners:
            raise ValueError('Your kbase username ('+self.username+') must be in the owners list in the kbase.yaml file.')

        # OPTIONAL TODO: check if all the users are on the owners list?  not necessarily required, because we
        # do a check during registration of the person who started the registration...

        # TODO: check for directory structure, method spec format, documentation, version
        self.validate_method_specs(basedir)

        # return the parse so we can figure things out later
        return self.kb_yaml


    def check_that_module_name_is_valid(self, module_name):
        if self.db.is_registered(module_name=module_name):
            raise ValueError('Module name (in kbase.yaml) is already registered.  Please specify a different name and try again.')
        if self.db.module_name_lc_exists(module_name_lc=module_name.lower()):
            raise ValueError('The case-insensitive module name (in kbase.yaml) is not unique.  Please specify a different name.')
        # only allow alphanumeric and underscore
        if not re.match(r'^[A-Za-z0-9_]+$', module_name):
            raise ValueError('Module names must be alphanumeric characters (including underscores) only, with no spaces.')


    def update_the_catalog(self, repo, basedir):

        # get the basic info that we need
        commit_hash = repo.head.commit.hexsha
        commit_message = repo.head.commit.message

        module_name = self.get_required_field_as_string(self.kb_yaml,'module-name')
        module_description = self.get_required_field_as_string(self.kb_yaml,'module-description')
        version = self.get_required_field_as_string(self.kb_yaml,'module-version')
        service_language = self.get_required_field_as_string(self.kb_yaml,'service-language')
        owners = self.get_required_field_as_list(self.kb_yaml,'owners')

        # first update the module name, which is now permanent, if we haven't already
        if ('module_name' not in self.module_details) or ('module_name_lc' not in self.module_details):
            error = self.db.set_module_name(self.git_url, module_name)
            if error is not None:
                raise ValueError('Unable to set module_name - there was an internal database error.' +error)

        # TODO: Could optimize by combining all these things into one mongo call, but for now this is easier.
        # Combining it into one call would just mean that this update happens as a single transaction, but a partial
        # update for now that fails midstream is probably not a huge issue- we can always reregister.

        # next update the basic information
        info = {
            'description': module_description,
            'language' : service_language
        }
        self.log('new info: '+pprint.pformat(info))
        error = self.db.set_module_info(info, git_url=self.git_url)
        if error is not None:
            raise ValueError('Unable to set module info - there was an internal database error: '+error)

        # next update the owners
        ownersListForUpdate = []
        for o in owners:
            # TODO: add some validation that the username is a valid kbase user
            ownersListForUpdate.append({'kb_username':o})
        self.log('new owners list: '+pprint.pformat(ownersListForUpdate))
        error = self.db.set_module_owners(ownersListForUpdate, git_url=self.git_url)
        if error is not None:
            raise ValueError('Unable to set module owners - there was an internal database error: '+error)

        # finally update the actual dev version info
        narrative_methods = []
        if os.path.isdir(os.path.join(basedir,'ui','narrative','methods')) :
            for m in os.listdir(os.path.join(basedir,'ui','narrative','methods')):
                if os.path.isdir(os.path.join(basedir,'ui','narrative','methods',m)):
                    narrative_methods.append(m)

        new_version = {
            'timestamp':self.timestamp,
            'version' : version,
            'git_commit_hash': commit_hash,
            'git_commit_message': commit_message,
            'narrative_methods': narrative_methods,
            'docker_img_name': self.image_name
        }
        self.log('new dev version object: '+pprint.pformat(new_version))
        error = self.db.update_dev_version(new_version, git_url=self.git_url)
        if error is not None:
            raise ValueError('Unable to update dev version - there was an internal database error: '+error)

        #push to NMS
        self.log('registering specs with NMS')
        self.nms.register_repo({'git_url':self.git_url, 'git_commit_hash':commit_hash})

        self.log('\ndone')

        # done!!!


    def validate_method_specs(self, basedir):
        self.log('validating narrative method specifications')
        if os.path.isdir(os.path.join(basedir,'ui','narrative','methods')) :
            for m in os.listdir(os.path.join(basedir,'ui','narrative','methods')):
                if os.path.isdir(os.path.join(basedir,'ui','narrative','methods',m)):
                    self.log('    - validating method: '+m)
                    # first grab the spec and display files, which are required
                    method_path = os.path.join(basedir,'ui','narrative','methods',m)
                    if not os.path.isfile(os.path.join(method_path,'spec.json')):
                        raise ValueError('Invalid narrative method specification ('+m+'): No spec.json file defined.')
                    if not os.path.isfile(os.path.join(method_path,'display.yaml')):
                        raise ValueError('Invalid narrative method specification ('+m+'): No spec.json file defined.')
                    with open(os.path.join(method_path,'spec.json')) as spec_json_file:
                        spec_json = spec_json_file.read()
                    with open(os.path.join(method_path,'display.yaml')) as display_yaml_file:
                        display_yaml = display_yaml_file.read()

                    # gather any extra html files
                    extraFiles = {}
                    for extra_file_name in os.listdir(os.path.join(method_path)):
                        if not os.path.isfile(os.path.join(method_path,extra_file_name)): break
                        if not extra_file_name.endswith('.html'): break
                        with open(os.path.join(method_path,extra_file_name)) as extra_file:
                            extrafiles[extra_file_name] = extra_file.read()

                    # validate against the NMS target endpoint
                    result = self.nms.validate_method({'id':m, 'spec_json':spec_json, 'display_yaml':display_yaml, 'extra_files':extraFiles});
    
                    # inspect results
                    if result['is_valid']>0:
                        self.log('        - valid!')
                        if 'warnings' in result:
                            if result['warnings']:
                                for w in result['warnings']:
                                    self.log('        - warning: '+w)
                    else:
                        self.log('        - not valid!')
                        if 'errors' in result:
                            if result['errors']:
                                for e in result['errors']:
                                    self.log('        - error: '+e)
                        else:
                            self.log('        - error is undefined!'+e)

                        raise ValueError('Invalid narrative method specification ('+m+')')

        else:
            self.log('    - no ui/narrative/methods directory found, so no narrative methods will be deployed')






    def get_required_field_as_string(self, kb_yaml, field_name):
        if field_name not in kb_yaml:
            raise ValueError('kbase.yaml file missing "'+field_name+'" required field')
        value = kb_yaml[field_name].strip()
        if not value:
            raise ValueError('kbase.yaml file missing value for "'+field_name+'" required field')
        return value

    def get_required_field_as_list(self, kb_yaml, field_name):
        if field_name not in kb_yaml:
            raise ValueError('kbase.yaml file missing "'+field_name+'" required field')
        value = kb_yaml[field_name]
        if not type(value) is list:
            raise ValueError('kbase.yaml file "'+field_name+'" required field must be a list')
        return value


    def log(self, message, no_end_line=False):
        if no_end_line:
            self.logfile.write(message)
        else:
            self.logfile.write(message+'\n')
        self.logfile.flush()

    def set_build_step(self, step):
        self.db.set_module_registration_state(git_url=self.git_url, new_state='building: '+step)

    def set_build_error(self, error_message):
        self.db.set_module_registration_state(git_url=self.git_url, new_state='error', error_message=error_message)

    def build_is_complete(self):
        self.db.set_module_registration_state(git_url=self.git_url, new_state='complete')

    def cleanup(self):
        if os.path.isdir(os.path.join(self.temp_dir,self.registration_id)):
            shutil.rmtree(os.path.join(self.temp_dir,self.registration_id))

    def build_docker_image(self, docker_client, image_name, basedir):
        self.log('\nBuilding the docker image for ' + image_name);
        #response = [ line for line in docker_client.build(path=basedir,rm=True,tag=image_name) ]
        #response_stream = response
        #imageId = response_stream[-1]
        #self.log(str(response_stream[-1]))

        # examine stream to determine success/failure of build
        imageId=None
        last={}
        for line in docker_client.build(path=basedir,rm=True,tag=image_name):
            line_parse = json.loads(line)
            if 'stream' in line_parse:
                self.log(line_parse['stream'],no_end_line=True)
            if 'errorDetail' in line_parse:
                self.log(str(line_parse),no_end_line=True)
                raise ValueError('Docker build failed: '+line_parse['errorDetail'])
            last=line_parse
        
        if 'stream' in last and last['stream'][:19]=='Successfully built ':
            imageId = docker_client.inspect_image(image_name)['Id']

        self.log('Docker build successful.')
        self.log('    Image Id:   ' + imageId)
        self.log('    Image Name: ' + image_name+'\n\n')
        return imageId

    def push_docker_image(self, docker_client, image_name):
        self.log('\nPushing docker image to registry for ' + image_name);
        (image,tag)=image_name.split(':')
        #response = [ line for line in docker_client.push(image, tag=tag, stream=True) ]
        #response_stream = response
        #self.log(str(response_stream))

        # to do: examine stream to determine success/failure of build
        for line in docker_client.push(image, tag=tag, stream=True):
            # example line:
            #'{"status":"Pushing","progressDetail":{"current":32,"total":32},"progress":"[==================================================\\u003e]     32 B/32 B","id":"da200da4256c"}'
            line_parse = json.loads(line)
            if 'id' in line_parse:
                self.log(line_parse['id']+' - ',no_end_line=True)
            if 'status' in line_parse:
                self.log(line_parse['status'],no_end_line=True)
            if 'progress' in line_parse:
                self.log(' - ' + line_parse['progress'],no_end_line=True)
            #if 'progressDetail' in line_parse:
            #    self.log(' - ' + str(line_parse['progressDetail']),no_end_line=True)
            self.log('') # add an endline

        # check for errors here somehow!

        
        self.log('done pushing docker image to registry for ' + image_name+'\n');


    # Temporary flags to test everything except docker
    # we should remove once the test rig can fully support docker and an NMS
    _TEST_WITHOUT_DOCKER = False
Ejemplo n.º 12
0
class Registrar:

    # params is passed in from the controller, should be the same as passed into the spec
    # db is a reference to the Catalog DB interface (usually a MongoCatalogDBI instance)
    def __init__(self, params, registration_id, timestamp, username, is_admin,token, db, temp_dir, docker_base_url, 
                    docker_registry_host, docker_push_allow_insecure, nms_url, nms_admin_token, module_details,
                    ref_data_base, kbase_endpoint, prev_dev_version):
        self.db = db
        self.params = params
        # at this point, we assume git_url has been checked
        self.git_url = params['git_url']

        self.registration_id = registration_id
        self.timestamp = timestamp
        self.username = username
        self.is_admin = is_admin
        self.token = token
        self.db = db
        self.temp_dir = temp_dir
        self.docker_base_url = docker_base_url
        self.docker_registry_host = docker_registry_host
        self.docker_push_allow_insecure = docker_push_allow_insecure

        self.nms_url = nms_url

        self.nms = NarrativeMethodStore(self.nms_url, token=nms_admin_token)

        self.local_function_reader = LocalFunctionReader()

        # (most) of the mongo document for this module snapshot before this registration
        self.module_details = module_details

        self.log_buffer = [];
        self.last_log_time = time.time() # in seconds
        self.log_interval = 1.0 # save log to mongo every second
        
        self.ref_data_base = ref_data_base
        self.kbase_endpoint = kbase_endpoint
        self.prev_dev_version = prev_dev_version


    def start_registration(self):
        try:
            self.logfile = open(self.temp_dir+'/registration.log.'+self.registration_id, 'w')
            self.log('Registration started on '+ str(datetime.datetime.now()) + ' by '+self.username)
            self.log('Registration ID: '+str(self.registration_id));
            self.log('Registration Parameters: '+str(self.params));

            ##############################
            # 1 - clone the repo into the temp directory that should already be reserved for us
            self.set_build_step('cloning git repo')
            if not os.path.isdir(os.path.join(self.temp_dir,self.registration_id)):
                raise('Directory for the git clone was not allocated!  This is an internal catalog server error, please report this problem.')

            basedir = os.path.join(self.temp_dir,self.registration_id,'module_repo')

            parsed_url=urlparse(self.git_url)

            self.log('Attempting to clone into: '+basedir);
            self.log('git clone ' + self.git_url)
            subprocess.check_call( ['git','clone',self.git_url, basedir ] )
            # try to get hash from repo
            git_commit_hash = str( subprocess.check_output ( ['git','log', '--pretty=%H', '-n', '1' ], cwd=basedir ) ).rstrip()
            self.log('current commit hash at HEAD:' + git_commit_hash)
            if 'git_commit_hash' in self.params:
                if self.params['git_commit_hash']:
                    self.log('git checkout ' + self.params['git_commit_hash'].strip())
                    subprocess.check_call ( ['git', 'checkout', '--quiet', self.params['git_commit_hash'] ], cwd=basedir )
                    git_commit_hash = self.params['git_commit_hash'].strip()

            # check if this was a git_commit_hash that was already released- if so, we abort for now (we could just update the dev tag in the future)
            for r in self.module_details['release_version_list']:
                if r['git_commit_hash'] == git_commit_hash:
                    raise ValueError('The specified commit is already released.  You cannot reregister that commit version or image.')

            # do the same for beta versions for now
            if 'beta' in self.module_details['current_versions'] and self.module_details['current_versions']['beta'] is not None:
                if self.module_details['current_versions']['beta']['git_commit_hash'] == git_commit_hash:
                    raise ValueError('The specified commit is already registered and in beta.  You cannot reregister that commit version or image.')


            ##############################
            # 2 - sanity check (things parse, files exist, module_name matches, etc)
            self.set_build_step('reading files and performing basic checks')
            self.sanity_checks_and_parse(basedir, git_commit_hash)


            ##############################
            # 2.5 - dealing with git releases .git/config.lock, if it still exists after 5s then kill it
###### should no longer need this after switching to subprocess
#            git_config_lock_file = os.path.join(basedir, ".git", "config.lock")
#            if os.path.exists(git_config_lock_file):
#                self.log('.git/config.lock exists, waiting 5s for it to release')
#                time.sleep(5)
#                if os.path.exists(git_config_lock_file):
#                    self.log('.git/config.lock file still there, we are just going to delete it....')
#                    os.remove(git_config_lock_file)

            ##############################
            # 3 docker build - in progress
            # perhaps make this a self attr?
            module_name_lc = self.get_required_field_as_string(self.kb_yaml,'module-name').strip().lower()
            self.image_name = self.docker_registry_host + '/kbase:' + module_name_lc + '.' + str(git_commit_hash)
            ref_data_folder = None
            ref_data_ver = None
            compilation_report = None
            if not Registrar._TEST_WITHOUT_DOCKER:
                # timeout set to 30 min because we often get timeouts if multiple people try to push at the same time
                dockerclient = None
                docker_timeout = 1800
                if len(str(self.docker_base_url)) > 0:
                    dockerclient = DockerClient(base_url = str(self.docker_base_url),timeout=docker_timeout)
                else:
                    # docker base URL is not set in config, let's use Docker-related env-vars in this case
                    docker_host = os.environ['DOCKER_HOST']
                    if docker_host is None or len(docker_host) == 0:
                        raise ValueError('Docker host should be defined either in configuration '
                                         '(docker-base-url property) or in DOCKER_HOST environment variable')
                    docker_tls_verify = os.environ['DOCKER_TLS_VERIFY']
                    if docker_host.startswith('tcp://'):
                        docker_protocol = "http"
                        if (docker_tls_verify is not None) and docker_tls_verify == '1':
                            docker_protocol = "https"
                        docker_host = docker_host.replace('tcp://', docker_protocol + '://')
                    docker_cert_path = os.environ['DOCKER_CERT_PATH']
                    docker_tls = False
                    if (docker_cert_path is not None) and len(docker_cert_path) > 0:
                        docker_tls = DockerTLSConfig(verify=False, 
                                                     client_cert=(docker_cert_path + '/cert.pem', 
                                                                  docker_cert_path + '/key.pem'))
                    self.log("Docker settings from environment variables are used: docker-host = " + docker_host + 
                             ", docker_cert_path = " + str(docker_cert_path))
                    dockerclient = DockerClient(base_url = docker_host,timeout=docker_timeout,
                            version='auto', tls=docker_tls)
                # look for docker image
                # this tosses cookies if image doesn't exist, so wrap in try, and build if try reports "not found"
                #self.log(str(dockerclient.inspect_image(repo_name)))
                # if image does not exist, build and set state
                self.set_build_step('building the docker image')
                # imageId is not yet populated properly
                imageId = self.build_docker_image(dockerclient,self.image_name,basedir)
                
                # check if reference data version is defined in kbase.yml
                if 'data-version' in self.kb_yaml:
                    ref_data_ver = str(self.kb_yaml['data-version']).strip()
                    if ref_data_ver:
                        ref_data_folder = module_name_lc
                        target_ref_data_dir = os.path.join(self.ref_data_base, ref_data_folder, ref_data_ver)
                        if os.path.exists(target_ref_data_dir):
                            self.log("Reference data for " + ref_data_folder + "/" + ref_data_ver + " was " +
                                     "already prepared, initialization step is skipped")
                        else:
                            self.set_build_step('preparing reference data (running init entry-point), ' +
                                                'ref-data version: ' + ref_data_ver)
                            self.prepare_ref_data(dockerclient, self.image_name, self.ref_data_base, ref_data_folder, 
                                                  ref_data_ver, basedir, self.temp_dir, self.registration_id,
                                                  self.token, self.kbase_endpoint)
                

                self.set_build_step('preparing compilation report')
                self.log('Preparing compilation report.')

                # Trying to extract compilation report with line numbers of funcdefs from docker image.
                # There is "report" entry-point command responsible for that. In case there are any
                # errors we just skip it.
                compilation_report = self.prepare_compilation_report(dockerclient, self.image_name, basedir, 
                                                                     self.temp_dir, self.registration_id, 
                                                                     self.token, self.kbase_endpoint)

                if compilation_report is None:
                    raise ValueError('Unable to generate a compilation report, which is now required, so your registration cannot continue.  ' +
                                        'If you have been successfully registering this module already, this means that you may need to update ' +
                                        'to the latest version of the KBase SDK and rebuild your makefile.')

                self.local_function_reader.finish_validation(compilation_report)


                self.log('Report complete')

                self.set_build_step('pushing docker image to registry')
                self.push_docker_image(dockerclient,self.image_name)


            else:
                self.log('IN TEST MODE!! SKIPPING DOCKER BUILD AND DOCKER REGISTRY UPDATE!!')

            # 4 - Update the DB
            self.set_build_step('updating the catalog')
            self.update_the_catalog(basedir, ref_data_folder, ref_data_ver, compilation_report)
            
            self.build_is_complete()

        except Exception as e:
            # set the build state to error and log it
            self.set_build_error(str(e))
            self.log(traceback.format_exc(), is_error=True)
            self.log('BUILD_ERROR: '+str(e), is_error=True)
            if self.prev_dev_version:
                self.log('Reverting dev version to git_commit_hash=' + self.prev_dev_version['git_commit_hash'] +
                         ', version=' + self.prev_dev_version['version'] + ', git_commit_message=' +
                         self.prev_dev_version['git_commit_message'])
                self.db.update_dev_version(self.prev_dev_version, git_url=self.git_url)
        finally:
            self.flush_log_to_db();
            self.logfile.close();
            self.cleanup();



    def sanity_checks_and_parse(self, basedir, git_commit_hash):
        # check that files exist
        yaml_filename = 'kbase.yaml'
        if not os.path.isfile(os.path.join(basedir,'kbase.yaml')) :
            if not os.path.isfile(os.path.join(basedir,'kbase.yml')):
                raise ValueError('kbase.yaml file does not exist in repo, but is required!')
            else:
                yaml_filename = 'kbase.yml'
        # parse some stuff, and check for things

        with codecs.open(os.path.join(basedir,yaml_filename), 'r', "utf-8", errors='ignore') as kb_yaml_file:
            kb_yaml_string = kb_yaml_file.read()
        self.kb_yaml = yaml.load(kb_yaml_string)
        self.log('=====kbase.yaml parse:')
        self.log(pprint.pformat(self.kb_yaml))
        self.log('=====end kbase.yaml')

        module_name = self.get_required_field_as_string(self.kb_yaml,'module-name').strip()
        module_description = self.get_required_field_as_string(self.kb_yaml,'module-description').strip()
        version = self.get_required_field_as_string(self.kb_yaml,'module-version').strip()

        # must be a semantic version
        if not semantic_version.validate(version):
            raise ValueError('Invalid version string in kbase.yaml - must be in semantic version format.  See http://semver.org')

        service_language = self.get_required_field_as_string(self.kb_yaml,'service-language').strip()
        owners = self.get_required_field_as_list(self.kb_yaml,'owners')

        service_config = self.get_optional_field_as_dict(self.kb_yaml, 'service-config')
        if service_config:
            # validate service_config parameters
            if 'dynamic-service' in service_config:
                if not type(service_config['dynamic-service']) == type(True):
                    raise ValueError('Invalid service-config in kbase.yaml - "dynamic-service" property must be a boolean "true" or "false".') 

        # module_name must match what exists (unless it is not yet defined)
        if 'module_name' in self.module_details:
            if self.module_details['module_name'] != module_name:
                raise ValueError('kbase.yaml file module_name field has changed since last version! ' +
                                    'Module names are permanent- if this is a problem, contact a kbase admin.')
        else:
            # This must be the first registration, so the module must not exist yet
            self.check_that_module_name_is_valid(module_name);

        # associate the module_name with the log file for easier searching (if we fail sooner, then the module name
        # cannot be used to lookup this log)
        self.db.set_build_log_module_name(self.registration_id, module_name)

        # you can't remove yourself from the owners list, or register something that you are not an owner of
        if self.username not in owners and self.is_admin is False:
            raise ValueError('Your kbase username ('+self.username+') must be in the owners list in the kbase.yaml file.')

        # OPTIONAL TODO: check if all the users are on the owners list?  not necessarily required, because we
        # do a check during registration of the person who started the registration...

        # TODO: check for directory structure, method spec format, documentation, version
        self.validate_method_specs(basedir)

        # initial validation of the local function specifications
        lf_report = self.local_function_reader.parse_and_basic_validation(basedir, self.module_details, module_name, version, git_commit_hash)
        self.log(self.local_function_reader.report_to_string_for_log(lf_report))

        if len(lf_report['functions_errored']) > 0:
            raise ValueError('Errors exist in local function specifications.')

        # return the parse so we can figure things out later
        return self.kb_yaml


    def check_that_module_name_is_valid(self, module_name):
        if self.db.is_registered(module_name=module_name):
            raise ValueError('Module name (in kbase.yaml) is already registered.  Please specify a different name and try again.')
        if self.db.module_name_lc_exists(module_name_lc=module_name.lower()):
            raise ValueError('The case-insensitive module name (in kbase.yaml) is not unique.  Please specify a different name.')
        # only allow alphanumeric and underscore
        if not re.match(r'^[A-Za-z0-9_]+$', module_name):
            raise ValueError('Module names must be alphanumeric characters (including underscores) only, with no spaces.')


    def update_the_catalog(self, basedir, ref_data_folder, ref_data_ver, compilation_report):
        # get the basic info that we need
        commit_hash = str( subprocess.check_output ( ['git','log', '--pretty=%H', '-n', '1' ], cwd=basedir ) ).rstrip()
        commit_message = str( subprocess.check_output ( ['git','log', '--pretty=%B', '-n', '1' ], cwd=basedir ) ).rstrip()

        module_name = self.get_required_field_as_string(self.kb_yaml,'module-name')
        module_description = self.get_required_field_as_string(self.kb_yaml,'module-description')
        version = self.get_required_field_as_string(self.kb_yaml,'module-version')
        service_language = self.get_required_field_as_string(self.kb_yaml,'service-language')
        owners = self.get_required_field_as_list(self.kb_yaml,'owners')
        service_config = self.get_optional_field_as_dict(self.kb_yaml, 'service-config')

        # first update the module name, which is now permanent, if we haven't already
        if ('module_name' not in self.module_details) or ('module_name_lc' not in self.module_details):
            error = self.db.set_module_name(self.git_url, module_name)
            if error is not None:
                raise ValueError('Unable to set module_name - there was an internal database error.' +error)

        # TODO: Could optimize by combining all these things into one mongo call, but for now this is easier.
        # Combining it into one call would just mean that this update happens as a single transaction, but a partial
        # update for now that fails midstream is probably not a huge issue- we can always reregister.

        # next update the basic information
        info = {
            'description': module_description,
            'language' : service_language
        }
        if service_config and service_config['dynamic-service']:
            info['dynamic_service'] = 1
        else:
            info['dynamic_service'] = 0

        local_functions = self.local_function_reader.extract_lf_records()
        if len(local_functions)>0:
            info['local_functions'] = 1
        else:
            info['local_functions'] = 0

        self.log('new info: '+pprint.pformat(info))
        error = self.db.set_module_info(info, git_url=self.git_url)
        if error is not None:
            raise ValueError('Unable to set module info - there was an internal database error: '+str(error))

        # next update the owners
        ownersListForUpdate = []
        for o in owners:
            # TODO: add some validation that the username is a valid kbase user
            ownersListForUpdate.append({'kb_username':o})
        self.log('new owners list: '+pprint.pformat(ownersListForUpdate))
        error = self.db.set_module_owners(ownersListForUpdate, git_url=self.git_url)
        if error is not None:
            raise ValueError('Unable to set module owners - there was an internal database error: '+str(error))

        # finally update the actual dev version info
        narrative_methods = []
        if os.path.isdir(os.path.join(basedir,'ui','narrative','methods')) :
            for m in os.listdir(os.path.join(basedir,'ui','narrative','methods')):
                if os.path.isdir(os.path.join(basedir,'ui','narrative','methods',m)):
                    narrative_methods.append(m)

        if len(local_functions) > 0:
            self.log('Saving local function specs:')
            self.log(pprint.pformat(local_functions))
            error = self.db.save_local_function_specs(local_functions)
            if error is not None:
                raise ValueError('There was an error saving local function specs, DB says: '+str(error))

        new_version = {
            'module_name': module_name.strip(),
            'module_name_lc': module_name.strip().lower(),
            'module_description': module_description,
            'released':0,
            'released_timestamp':None,
            'notes': '',
            'timestamp':self.timestamp,
            'registration_id':self.registration_id,
            'version' : version,
            'git_commit_hash': commit_hash,
            'git_commit_message': commit_message,
            'narrative_methods': narrative_methods,
            'local_functions' : self.local_function_reader.extract_lf_names(),
            'docker_img_name': self.image_name,
            'compilation_report': compilation_report
        }
        if ref_data_ver:
            new_version['data_folder'] = ref_data_folder
            new_version['data_version'] = ref_data_ver
        if service_config and service_config['dynamic-service']:
            new_version['dynamic_service'] = 1
        else:
            new_version['dynamic_service'] = 0

        self.log('new dev version object: '+pprint.pformat(new_version))
        error = self.db.update_dev_version(new_version)
        if error is not None:
            raise ValueError('Unable to update dev version - there was an internal database error: '+str(error))

        #push to NMS
        self.log('registering specs with NMS')
        self.nms.register_repo({'git_url':self.git_url, 'git_commit_hash':commit_hash})

        self.log('\ndone')

        # done!!!


    def validate_method_specs(self, basedir):
        self.log('validating narrative method specifications')
        if os.path.isdir(os.path.join(basedir,'ui','narrative','methods')) :
            for m in os.listdir(os.path.join(basedir,'ui','narrative','methods')):
                if os.path.isdir(os.path.join(basedir,'ui','narrative','methods',m)):
                    self.log('    - validating method: '+m)
                    # first grab the spec and display files, which are required
                    method_path = os.path.join(basedir,'ui','narrative','methods',m)
                    if not os.path.isfile(os.path.join(method_path,'spec.json')):
                        raise ValueError('Invalid narrative method specification ('+m+'): No spec.json file defined.')
                    if not os.path.isfile(os.path.join(method_path,'display.yaml')):
                        raise ValueError('Invalid narrative method specification ('+m+'): No spec.json file defined.')

                    with codecs.open(os.path.join(method_path,'spec.json'), 'r', "utf-8", errors='ignore') as spec_json_file:
                        spec_json = spec_json_file.read()
                    with codecs.open(os.path.join(method_path,'display.yaml'), 'r', "utf-8", errors='ignore') as display_yaml_file:
                        display_yaml = display_yaml_file.read()

                    # gather any extra html files
                    extraFiles = {}
                    for extra_file_name in os.listdir(os.path.join(method_path)):
                        if not os.path.isfile(os.path.join(method_path,extra_file_name)): break
                        if not extra_file_name.endswith('.html'): break
                        with codecs.open(oos.path.join(method_path,extra_file_name), 'r', "utf-8", errors='ignore') as extra_file: 
                            extrafiles[extra_file_name] = extra_file.read()

                    # validate against the NMS target endpoint
                    result = self.nms.validate_method({'id':m, 'spec_json':spec_json, 'display_yaml':display_yaml, 'extra_files':extraFiles});
    
                    # inspect results
                    if result['is_valid']>0:
                        self.log('        - valid!')
                        if 'warnings' in result:
                            if result['warnings']:
                                for w in result['warnings']:
                                    self.log('        - warning: '+w)
                    else:
                        self.log('        - not valid!', is_error=True)
                        if 'errors' in result:
                            if result['errors']:
                                for e in result['errors']:
                                    self.log('        - error: '+e, is_error=True)
                        else:
                            self.log('        - error is undefined!'+e,  is_error=True)

                        raise ValueError('Invalid narrative method specification ('+m+')')

        else:
            self.log('    - no ui/narrative/methods directory found, so no narrative methods will be deployed')






    def get_required_field_as_string(self, kb_yaml, field_name):
        if field_name not in kb_yaml:
            raise ValueError('kbase.yaml file missing "'+field_name+'" required field')
        value = kb_yaml[field_name].strip()
        if not value:
            raise ValueError('kbase.yaml file missing value for "'+field_name+'" required field')
        return value

    def get_required_field_as_list(self, kb_yaml, field_name):
        if field_name not in kb_yaml:
            raise ValueError('kbase.yaml file missing "'+field_name+'" required field')
        value = kb_yaml[field_name]
        if not type(value) is list:
            raise ValueError('kbase.yaml file "'+field_name+'" required field must be a list')
        return value

    def get_optional_field_as_dict(self, kb_yaml, field_name):
        if field_name not in kb_yaml:
            return None
        value = kb_yaml[field_name]
        if not type(value) is dict:
            raise ValueError('kbase.yaml file "'+field_name+'" optional field must be a dict')
        return value



    def log(self, message, no_end_line=False, is_error=False):
        if no_end_line:
            content = message
        else:
            content = message + '\n'
        self.logfile.write(content)
        self.logfile.flush()

        lines = content.splitlines();
        for l in lines:
            # add each line to the buffer
            if len(l)>1000 :
                l = l[0:1000] + ' ... truncated to 1k characters of ' + str(len(l))
            self.log_buffer.append({'content':l+'\n', 'error':is_error})

        # save the buffer to mongo if enough time has elapsed, or the buffer is more than 1000 lines
        if (time.time() - self.last_log_time > self.log_interval) or (len(self.log_buffer)>1000):
            self.flush_log_to_db();

    def flush_log_to_db(self):
        # todo: if we lose log lines, that's ok.  Make sure we handle case if log is larger than mongo doc size
        self.db.append_to_build_log(self.registration_id, self.log_buffer)
        self.log_buffer = [] #clear the buffer
        self.last_log_time = time.time() # reset the log timer


    def set_build_step(self, step):
        self.db.set_module_registration_state(git_url=self.git_url, new_state='building: '+step)
        self.db.set_build_log_state(self.registration_id, 'building: '+step)

    def set_build_error(self, error_message):
        self.db.set_module_registration_state(git_url=self.git_url, new_state='error', error_message=error_message)
        self.db.set_build_log_state(self.registration_id, 'error', error_message=error_message)

    def build_is_complete(self):
        self.db.set_module_registration_state(git_url=self.git_url, new_state='complete')
        self.db.set_build_log_state(self.registration_id, 'complete')

    def cleanup(self):
        if os.path.isdir(os.path.join(self.temp_dir,self.registration_id)):
            shutil.rmtree(os.path.join(self.temp_dir,self.registration_id))

    def build_docker_image(self, docker_client, image_name, basedir):
        self.log('\nBuilding the docker image for ' + image_name);

        # examine stream to determine success/failure of build
        imageId=None
        last={}
        for line in docker_client.build(path=basedir,rm=True,tag=image_name, pull=False):
            line_parse = json.loads(line)
            log_line = ''
            if 'stream' in line_parse:
                self.log(line_parse['stream'],no_end_line=True)
            if 'errorDetail' in line_parse:
                self.log(str(line_parse),no_end_line=True)
                raise ValueError('Docker build failed: '+str(line_parse['errorDetail']))
            last=line_parse
        
        if 'stream' in last and last['stream'][:19]=='Successfully built ':
            imageId = docker_client.inspect_image(image_name)['Id']

        self.log('Docker build successful.')
        self.log('    Image Id:   ' + imageId)
        self.log('    Image Name: ' + image_name+'\n\n')
        return imageId

    def push_docker_image(self, docker_client, image_name):
        self.log('\nPushing docker image to registry for ' + image_name);
        colon_pos = image_name.rfind(':')  # This logic supports images with "host:port/" prefix for private registry 
        image=image_name[:colon_pos]
        tag=image_name[colon_pos+1:]
        #response = [ line for line in docker_client.push(image, tag=tag, stream=True) ]
        #response_stream = response
        #self.log(str(response_stream))

        # to do: examine stream to determine success/failure of build
        if self.docker_push_allow_insecure:
            print("Docker push: insecure_registry: "+ str(self.docker_push_allow_insecure))
        else:
            print("Docker push: insecure_registry: None")
        for line in docker_client.push(image, tag=tag, stream=True, insecure_registry = self.docker_push_allow_insecure):
            # example line:
            #'{"status":"Pushing","progressDetail":{"current":32,"total":32},"progress":"[==================================================\\u003e]     32 B/32 B","id":"da200da4256c"}'
            line_parse = json.loads(line)
            log_line = ''
            if 'id' in line_parse:
                log_line += line_parse['id']+' - ';
            if 'status' in line_parse:
                log_line += line_parse['status']
            if 'progress' in line_parse:
                log_line += ' - ' + line_parse['progress']
            #if 'progressDetail' in line_parse:
            #    self.log(' - ' + str(line_parse['progressDetail']),no_end_line=True)

            # catch anything unexpected, we should probably throw an error here
            for key in line_parse:
                if key not in ['id','status','progress','progressDetail']:
                    log_line += '['+key+'='+str(line_parse[key])+'] '

            self.log(log_line)

            if 'error' in line_parse:
                self.log(str(line_parse),no_end_line=True)
                raise ValueError('Docker push failed: '+str(line_parse['error']))

        self.log('done pushing docker image to registry for ' + image_name+'\n');


    def run_docker_container(self, dockerclient, image_name, token, 
                             kbase_endpoint, binds, work_dir, command):
        cnt_id = None
        try:
            token_file = os.path.join(work_dir, "token")
            with open(token_file, "w") as file:
                file.write(token)
            config_file = os.path.join(work_dir, "config.properties")
            with open(config_file, "w") as file:
                file.write("[global]\n" + 
                           "job_service_url = " + kbase_endpoint + "/userandjobstate\n" +
                           "workspace_url = " + kbase_endpoint + "/ws\n" +
                           "shock_url = " + kbase_endpoint + "/shock-api\n" +
                           "kbase_endpoint = " + kbase_endpoint + "\n")
            if not binds:
                binds = {}
            binds[work_dir] = {"bind": "/kb/module/work", "mode": "rw"}
            container = dockerclient.create_container(image=image_name, command=command, tty=True,
                    host_config=dockerclient.create_host_config(binds=binds))
            cnt_id = container.get('Id')
            self.log('Running "' + command + '" entry-point command, container Id=' + cnt_id)
            dockerclient.start(container=cnt_id)
            stream = dockerclient.logs(container=cnt_id, stdout=True, stderr=True, stream=True)
            line = ""
            for char in stream:
                if char == '\r':
                    continue
                if char == '\n':
                    self.log(line)
                    line = ""
                else:
                    line += char
            if len(line) > 0:
                self.log(line)
        finally:
            # cleaning up the container
            try:
                if cnt_id:
                    dockerclient.remove_container(container=cnt_id, v=True, force=True)
                self.log("Docker container (Id=" + cnt_id + ") was cleaned up")
            except:
                pass


    def prepare_ref_data(self, dockerclient, image_name, ref_data_base, ref_data_folder, 
                         ref_data_ver, basedir, temp_dir, registration_id, token, kbase_endpoint):
        self.log('\nReference data: creating docker container for initialization')
        if not os.path.exists(ref_data_base):
            raise ValueError("Reference data network folder doesn't exist: " + ref_data_base)
        upper_target_dir = os.path.join(ref_data_base, ref_data_folder)
        if not os.path.exists(upper_target_dir):
            os.mkdir(upper_target_dir)
        temp_ref_data_dir = os.path.join(upper_target_dir, "temp_" + registration_id)
        try:
            repo_data_dir = os.path.join(basedir, "data")
            os.mkdir(temp_ref_data_dir)
            binds = {temp_ref_data_dir: {"bind": "/data", "mode": "rw"},
                     repo_data_dir: {"bind": "/kb/module/data", "mode": "rw"}}
            temp_work_dir = os.path.join(temp_dir,registration_id,'ref_data_workdir')
            os.mkdir(temp_work_dir)
            self.run_docker_container(dockerclient, image_name, token, kbase_endpoint, 
                                      binds, temp_work_dir, 'init')
            ready_file = os.path.join(temp_ref_data_dir, "__READY__")
            if os.path.exists(ready_file):
                target_dir = os.path.join(upper_target_dir, ref_data_ver)
                os.rename(temp_ref_data_dir, target_dir)
                self.log("Reference data was successfully deployed into " + target_dir)
            else:
                raise ValueError("__READY__ file is not detected in reference data folder, produced data will be discarded")
        finally:
            # cleaning up temporary ref-data (if not renamed into permanent after success)
            try:
                if os.path.exists(temp_ref_data_dir):
                    shutil.rmtree(temp_ref_data_dir)
            except:
                pass


    def prepare_compilation_report(self, dockerclient, image_name, basedir, temp_dir, 
                                   registration_id, token, kbase_endpoint):
        self.log('\nCompilation report: creating docker container')
        try:
            temp_work_dir = os.path.join(temp_dir,registration_id,'report_workdir')
            os.mkdir(temp_work_dir)
            self.run_docker_container(dockerclient, image_name, token, kbase_endpoint, 
                                      None, temp_work_dir, 'report')
            report_file = os.path.join(temp_work_dir, 'compile_report.json')
            if not os.path.exists(report_file):
                self.log("Report file doesn't exist: " + report_file)
                return None
            else:
                with open(report_file) as f:    
                    return json.load(f)
        except Exception, e:
            self.log("Error preparing compilation log: " + str(e))
        return None
Ejemplo n.º 13
0
# GetAppStats
#
import requests
import os
import datetime, time
import mysql.connector as mysql
from biokbase.catalog.Client import Catalog
from biokbase.narrative_method_store.client import NarrativeMethodStore

requests.packages.urllib3.disable_warnings()

catalog = Catalog(url=os.environ['CATALOG_URL'],
                  token=os.environ['METRICS_USER_TOKEN'])
nms = NarrativeMethodStore(url=os.environ['NARRATIVE_METHOD_STORE'])
sql_host = os.environ['SQL_HOST']
query_on = os.environ['QUERY_ON']

#Insures all finish times within last day.
yesterday = (datetime.date.today() - datetime.timedelta(days=1))


def get_user_app_stats(
        start_date=datetime.datetime.combine(yesterday,
                                             datetime.datetime.min.time()),
        end_date=datetime.datetime.combine(yesterday,
                                           datetime.datetime.max.time())):
    """ 
    Gets a data dump from the app cataloge for a certain date window.   
    If no statt and end date are entered it will default to the last 15 calendar days (UTC TIME).
    It is 15 days because it uses an underlying method that 
    filters by creation_time and not finish_time
Ejemplo n.º 14
0
    def update_the_catalog(self, repo, basedir):

        # get the basic info that we need
        commit_hash = repo.head.commit.hexsha
        commit_message = repo.head.commit.message

        module_name = self.get_required_field_as_string(self.kb_yaml,'module-name')
        module_description = self.get_required_field_as_string(self.kb_yaml,'module-description')
        version = self.get_required_field_as_string(self.kb_yaml,'module-version')
        service_language = self.get_required_field_as_string(self.kb_yaml,'service-language')
        owners = self.get_required_field_as_list(self.kb_yaml,'owners')

        # first update the module name, which is now permanent, if we haven't already
        if ('module_name' not in self.module_details) or ('module_name_lc' not in self.module_details):
            error = self.db.set_module_name(self.git_url, module_name)
            if error is not None:
                raise ValueError('Unable to set module_name - there was an internal database error.' +error)

        # TODO: Could optimize by combining all these things into one mongo call, but for now this is easier.
        # Combining it into one call would just mean that this update happens as a single transaction, but a partial
        # update for now that fails midstream is probably not a huge issue- we can always reregister.

        # next update the basic information
        info = {
            'description': module_description,
            'language' : service_language
        }
        self.log('new info: '+pprint.pformat(info))
        error = self.db.set_module_info(info, git_url=self.git_url)
        if error is not None:
            raise ValueError('Unable to set module info - there was an internal database error: '+error)

        # next update the owners
        ownersListForUpdate = []
        for o in owners:
            # TODO: add some validation that the username is a valid kbase user
            ownersListForUpdate.append({'kb_username':o})
        self.log('new owners list: '+pprint.pformat(ownersListForUpdate))
        error = self.db.set_module_owners(ownersListForUpdate, git_url=self.git_url)
        if error is not None:
            raise ValueError('Unable to set module owners - there was an internal database error: '+error)

        # finally update the actual dev version info
        narrative_methods = []
        if os.path.isdir(os.path.join(basedir,'ui','narrative','methods')) :
            for m in os.listdir(os.path.join(basedir,'ui','narrative','methods')):
                if os.path.isdir(os.path.join(basedir,'ui','narrative','methods',m)):
                    narrative_methods.append(m)

        new_version = {
            'timestamp':self.timestamp,
            'version' : version,
            'git_commit_hash': commit_hash,
            'git_commit_message': commit_message,
            'narrative_methods': narrative_methods,
            'docker_img_name': self.image_name
        }
        self.log('new dev version object: '+pprint.pformat(new_version))
        error = self.db.update_dev_version(new_version, git_url=self.git_url)
        if error is not None:
            raise ValueError('Unable to update dev version - there was an internal database error: '+error)

        #push to NMS
        if Registrar._TEST_WITHOUT_NMS:
            self.log('IN TEST MODE!! SKIPPING NMS UPDATE!!')
            return
        nms = NarrativeMethodStore(self.nms_url,user_id=self.nms_admin_user,password=self.nms_admin_psswd)
        nms.register_repo({'git_url':self.git_url, 'git_commit_hash':commit_hash})
Ejemplo n.º 15
0
    def __init__(self, config):

        # first grab the admin list
        self.adminList = []
        if 'admin-users' in config:
            tokens = config['admin-users'].split(',')
            for t in tokens:
                if t.strip():
                    self.adminList.append(t.strip())
        if not self.adminList:
            warnings.warn('no "admin-users" are set in config of CatalogController.')

        # make sure the minimal mongo settings are in place
        if 'mongodb-host' not in config:
            raise ValueError('"mongodb-host" config variable must be defined to start a CatalogController!')
        if 'mongodb-database' not in config:
            raise ValueError('"mongodb-database" config variable must be defined to start a CatalogController!')

        # give warnings if no mongo user information is set
        if 'mongodb-user' not in config:
            warnings.warn('"mongodb-user" is not set in config of CatalogController.')
            config['mongodb-user']=''
            config['mongodb-pwd']=''
        if 'mongodb-pwd' not in config:
            warnings.warn('"mongodb-pwd" is not set in config of CatalogController.')
            config['mongodb-pwd']=''

        # instantiate the mongo client
        self.db = MongoCatalogDBI(
                    config['mongodb-host'],
                    config['mongodb-database'],
                    config['mongodb-user'],
                    config['mongodb-pwd'])

        # check for the temp directory and make sure it exists
        if 'temp-dir' not in config:
            raise ValueError('"temp-dir" config variable must be defined to start a CatalogController!')
        self.temp_dir = config['temp-dir']
        if not os.path.exists(self.temp_dir):
            raise ValueError('"temp-dir" does not exist! It is required for registration to work!')
        if not os.path.exists(self.temp_dir):
            raise ValueError('"temp-dir" does not exist! Space is required for registration to work!')
        if not os.access(self.temp_dir, os.W_OK):
            raise ValueError('"temp-dir" not writable! Writable space is required for registration to work!')

        if 'docker-base-url' not in config:
            raise ValueError('"docker-base-url" config variable must be defined to start a CatalogController!')
        self.docker_base_url = config['docker-base-url']
        print(self.docker_base_url)

        if 'docker-registry-host' not in config:
            raise ValueError('"docker-registry-host" config variable must be defined to start a CatalogController!')
        self.docker_registry_host = config['docker-registry-host']
        print(self.docker_registry_host)

        if 'nms-url' not in config:
            raise ValueError('"nms-url" config variable must be defined to start a CatalogController!')
        self.nms_url = config['nms-url']
        if 'nms-admin-user' not in config:
            raise ValueError('"nms-admin-user" config variable must be defined to start a CatalogController!')
        self.nms_admin_user = config['nms-admin-user']
        if 'nms-admin-psswd' not in config:
            raise ValueError('"nms-admin-psswd" config variable must be defined to start a CatalogController!')
        self.nms_admin_psswd = config['nms-admin-psswd']

        self.nms = NarrativeMethodStore(self.nms_url,user_id=self.nms_admin_user,password=self.nms_admin_psswd)
Ejemplo n.º 16
0
class CatalogController:


    def __init__(self, config):

        # first grab the admin list
        self.adminList = []
        if 'admin-users' in config:
            tokens = config['admin-users'].split(',')
            for t in tokens:
                if t.strip():
                    self.adminList.append(t.strip())
        if not self.adminList:
            warnings.warn('no "admin-users" are set in config of CatalogController.')

        # make sure the minimal mongo settings are in place
        if 'mongodb-host' not in config:
            raise ValueError('"mongodb-host" config variable must be defined to start a CatalogController!')
        if 'mongodb-database' not in config:
            raise ValueError('"mongodb-database" config variable must be defined to start a CatalogController!')

        # give warnings if no mongo user information is set
        if 'mongodb-user' not in config:
            warnings.warn('"mongodb-user" is not set in config of CatalogController.')
            config['mongodb-user']=''
            config['mongodb-pwd']=''
        if 'mongodb-pwd' not in config:
            warnings.warn('"mongodb-pwd" is not set in config of CatalogController.')
            config['mongodb-pwd']=''

        # instantiate the mongo client
        self.db = MongoCatalogDBI(
                    config['mongodb-host'],
                    config['mongodb-database'],
                    config['mongodb-user'],
                    config['mongodb-pwd'])

        # check for the temp directory and make sure it exists
        if 'temp-dir' not in config:
            raise ValueError('"temp-dir" config variable must be defined to start a CatalogController!')
        self.temp_dir = config['temp-dir']
        if not os.path.exists(self.temp_dir):
            raise ValueError('"temp-dir" does not exist! It is required for registration to work!')
        if not os.path.exists(self.temp_dir):
            raise ValueError('"temp-dir" does not exist! Space is required for registration to work!')
        if not os.access(self.temp_dir, os.W_OK):
            raise ValueError('"temp-dir" not writable! Writable space is required for registration to work!')

        if 'docker-base-url' not in config:
            raise ValueError('"docker-base-url" config variable must be defined to start a CatalogController!')
        self.docker_base_url = config['docker-base-url']
        print(self.docker_base_url)

        if 'docker-registry-host' not in config:
            raise ValueError('"docker-registry-host" config variable must be defined to start a CatalogController!')
        self.docker_registry_host = config['docker-registry-host']
        print(self.docker_registry_host)

        if 'nms-url' not in config:
            raise ValueError('"nms-url" config variable must be defined to start a CatalogController!')
        self.nms_url = config['nms-url']
        if 'nms-admin-user' not in config:
            raise ValueError('"nms-admin-user" config variable must be defined to start a CatalogController!')
        self.nms_admin_user = config['nms-admin-user']
        if 'nms-admin-psswd' not in config:
            raise ValueError('"nms-admin-psswd" config variable must be defined to start a CatalogController!')
        self.nms_admin_psswd = config['nms-admin-psswd']

        self.nms = NarrativeMethodStore(self.nms_url,user_id=self.nms_admin_user,password=self.nms_admin_psswd)


    def register_repo(self, params, username, token):

        if 'git_url' not in params:
            raise ValueError('git_url not defined, but is required for registering a repository')
        git_url = params['git_url']
        if not bool(urlparse(git_url).netloc):
            raise ValueError('The git url provided is not a valid URL.')
        # generate a unique registration ID based on a timestamp in ms + 4 random digits
        timestamp = int((datetime.utcnow() - datetime.utcfromtimestamp(0)).total_seconds()*1000)
        registration_id = str(timestamp)+'_'+str(random.randint(1000,9999))
        tries = 20
        for t in range(20):
            try:
                # keep trying to make the directory until it works
                os.mkdir(os.path.join(self.temp_dir,registration_id))
                break
            except:
                # if we fail, wait a bit and try again
                time.sleep(0.002)
                timestamp = int((datetime.utcnow() - datetime.utcfromtimestamp(0)).total_seconds()*1000)
                registration_id = str(timestamp)+'_'+random.randint(1000,9999)

        # if we couldn't reserve a spot for this registration, then quit
        if not os.path.isdir(os.path.join(self.temp_dir,registration_id)):
            raise ValueError('Unable to allocate a directory for building.  Try again, and if the problem persists contact us.')

        # 0) Make sure the submitter is on the list
        if not self.is_approved_developer([username])[0]:
            raise ValueError('You are not an approved developer.  Contact us to request approval.')

        # 1) If the repo does not yet exist, then create it.  No additional permission checks needed
        if not self.db.is_registered(git_url=git_url) : 
            self.db.register_new_module(git_url, username, timestamp)
            module_details = self.db.get_module_details(git_url=git_url)
        
        # 2) If it has already been registered, make sure the user has permissions to update, and
        # that the module is in a state where it can be registered 
        else:
            module_details = self.db.get_module_details(git_url=git_url)

            # 2a) Make sure the user has permission to register this URL
            if self.has_permission(username,module_details['owners']):
                # 2b) Make sure the current registration state is either 'complete' or 'error'
                state = module_details['state']
                registration_state = state['registration']
                if registration_state == 'complete' or registration_state == 'error':
                    error = self.db.set_module_registration_state(git_url=git_url, new_state='started', last_state=registration_state)
                    if error is not None:
                        # we can fail if the registration state changed when we were first checking to now.  This is important
                        # to ensure we only ever kick off one registration thread at a time
                        raise ValueError('Registration failed for git repo ('+git_url+') - registration state was modified before build could begin: '+error)
                    # we know we are the only operation working, so we can clear the dev version and upate the timestamp
                    self.db.update_dev_version({'timestamp':timestamp}, git_url=git_url)
                else:
                    raise ValueError('Registration already in progress for this git repo ('+git_url+')')
            else :
                raise ValueError('You ('+username+') are an approved developer, but do not have permission to register this repo ('+git_url+')')

        # 3) Ok, kick off the registration thread
        #   - This will check out the repo, attempt to build the image, run some tests, store the image
        #   - If all went well, and during the process, it will update the registration state of the
        #     module and finally update the dev version
        #   - If things failed, it will set the error state, and set an error message.

        # first set the dev current_release timestamp

        t = threading.Thread(target=_start_registration, args=(params,registration_id,timestamp,username,token,self.db, self.temp_dir, self.docker_base_url, 
            self.docker_registry_host, self.nms_url, self.nms_admin_user, self.nms_admin_psswd, module_details))
        t.start()

        # 4) provide the timestamp 
        return registration_id



    def set_registration_state(self, params, username):
        # first some error handling
        if not self.is_admin(username):
            raise ValueError('You do not have permission to modify the registration state of this module/repo.')
        params = self.filter_module_or_repo_selection(params)
        if 'registration_state' not in params:
            raise ValueError('Update failed - no registration state indicated.')
        #TODO: possibly check for empty states or that the state is a valid state here
        #if not params['registration_state'] :
        error_message = ''
        if params['registration_state'] == 'error':
            if 'error_message' not in params:
                raise ValueError('Update failed - if state is "error", you must also set an "error_message".')
            if params['error_message']:
                raise ValueError('Update failed - if state is "error", you must also set an "error_message".')
            error_message = params['error_message']
        else:
            # then we update the state
            error = self.db.set_module_registration_state(
                        git_url=params['git_url'],
                        module_name=params['module_name'],
                        new_state=params['registration_state'],
                        error_message=error_message)
            if error is not None:
                raise ValueError('Registration failed for git repo ('+git_url+')- some unknown database error: ' + error)


    def push_dev_to_beta(self, params, username):
        # first make sure everything exists and we have permissions
        params = self.filter_module_or_repo_selection(params)
        module_details = self.db.get_module_details(module_name=params['module_name'],git_url=params['git_url'])
        # Make sure the submitter is still an approved developer
        if not self.is_approved_developer([username])[0]:
            raise ValueError('You are not an approved developer.  Contact us to request approval.')

        if not self.has_permission(username,module_details['owners']):
            raise ValueError('You do not have permission to modify this module/repo.')
        # next make sure the state of the module is ok (it must be active, no pending registrations or release requests)
        if not module_details['state']['active']:
            raise ValueError('Cannot push dev to beta- module/repo is no longer active.')
        if module_details['state']['registration'] != 'complete':
            raise ValueError('Cannot push dev to beta- last registration is in progress or has an error.')
        if module_details['state']['release_approval'] == 'under_review':
            raise ValueError('Cannot push dev to beta- last release request of beta is still pending.')
        # ok, do it.
        self.nms.push_repo_to_tag({'module_name':module_details['module_name'], 'tag':'beta'})
        error = self.db.push_dev_to_beta(module_name=params['module_name'],git_url=params['git_url'])
        if error is not None:
            raise ValueError('Update operation failed - some unknown database error: '+error)

    def request_release(self, params, username):
        # first make sure everything exists and we have permissions
        params = self.filter_module_or_repo_selection(params)
        module_details = self.db.get_module_details(module_name=params['module_name'],git_url=params['git_url'])
        # Make sure the submitter is still an approved developer
        if not self.is_approved_developer([username])[0]:
            raise ValueError('You are not an approved developer.  Contact us to request approval.')
        if not self.has_permission(username,module_details['owners']):
            raise ValueError('You do not have permission to modify this module/repo.')
        # next make sure the state of the module is ok (it must be active, no pending release requests)
        if not module_details['state']['active']:
            raise ValueError('Cannot request release - module/repo is no longer active.')
        if module_details['state']['release_approval'] == 'under_review':
            raise ValueError('Cannot request release - last release request of beta is still pending.')
        # beta version must exist
        if not module_details['current_versions']['beta']:
            raise ValueError('Cannot request release - no beta version has been created yet.')

        # beta version must be different than release version (if release version exists)
        if module_details['current_versions']['release']:
            if module_details['current_versions']['beta']['timestamp'] == module_details['current_versions']['release']['timestamp']:
                raise ValueError('Cannot request release - beta version is identical to released version.')

        # ok, do it.
        error = self.db.set_module_release_state(
                        module_name=params['module_name'],git_url=params['git_url'],
                        new_state='under_review',
                        last_state=module_details['state']['release_approval']
                    )
        if error is not None:
            raise ValueError('Release request failed - some unknown database error.'+error)

    def list_requested_releases(self):
        query={'state.release_approval':'under_review'}
        results=self.db.find_current_versions_and_owners(query)
        requested_releases = []
        for r in results:
            owners = []
            for o in r['owners']:
                owners.append(o['kb_username'])
            beta = r['current_versions']['beta']
            timestamp = beta['timestamp']
            requested_releases.append({
                    'module_name':r['module_name'],
                    'git_url':r['git_url'],
                    'timestamp':timestamp,
                    'git_commit_hash':beta['git_commit_hash'],
                    'git_commit_message':beta['git_commit_message'],
                    'owners':owners
                })
        return requested_releases


    def review_release_request(self, review, username):
        if not self.is_admin(username):
            raise ValueError('You do not have permission to review a release request.')
        review = self.filter_module_or_repo_selection(review)

        module_details = self.db.get_module_details(module_name=review['module_name'],git_url=review['git_url'])
        if module_details['state']['release_approval'] != 'under_review':
            raise ValueError('Cannot review request - module/repo is not under review!')

        if not module_details['state']['active']:
            raise ValueError('Cannot review request - module/repo is no longer active.')
        if module_details['state']['release_approval'] != 'under_review':
            raise ValueError('Cannot review request - module/repo is not under review!')

        if 'decision' not in review:
            raise ValueError('Cannot set review - no "decision" was provided!')
        if not review['decision']:
            raise ValueError('Cannot set review - no "decision" was provided!')
        if review['decision']=='denied':
            if 'review_message' not in review:
                raise ValueError('Cannot set review - if denied, you must set a "review_message"!')
            if not review['review_message'].strip():
                raise ValueError('Cannot set review - if denied, you must set a "review_message"!')
        if 'review_message' not in review:
            review['review_message']=''
        if review['decision'] not in ['approved','denied']:
                raise ValueError('Cannot set review - decision must be "approved" or "denied"')

        # ok, do it.  

        # if the state is approved, then we need to save the beta version over the release version and stash
        # a new entry.  The DBI will handle that for us. (note that concurency issues don't really matter
        # here because if this is done twice (for instance, before the release_state is set to approved in
        # the document in the next call) there won't be any problems.)  I like nested parentheses.
        if review['decision']=='approved':
            self.nms.push_repo_to_tag({'module_name':module_details['module_name'], 'tag':'release'})
            error = self.db.push_beta_to_release(module_name=review['module_name'],git_url=review['git_url'])


        # Now we can update the release state state...
        error = self.db.set_module_release_state(
                        module_name=review['module_name'],git_url=review['git_url'],
                        new_state=review['decision'],
                        last_state=module_details['state']['release_approval'],
                        review_message=review['review_message']
                    )
        if error is not None:
            raise ValueError('Release review update failed - some unknown database error. ' + error)


    def get_module_state(self, params):
        params = self.filter_module_or_repo_selection(params)
        return self.db.get_module_state(module_name=params['module_name'],git_url=params['git_url'])


    def get_module_info(self, params):
        params = self.filter_module_or_repo_selection(params)
        details = self.db.get_module_details(module_name=params['module_name'], git_url=params['git_url'])

        owners = []
        for o in details['owners']:
            owners.append(o['kb_username'])

        info = {
            'module_name': details['module_name'],
            'git_url': details['git_url'],

            'description': details['info']['description'],
            'language': details['info']['language'],

            'owners': owners,

            'release': details['current_versions']['release'],
            'beta': details['current_versions']['beta'],
            'dev': details['current_versions']['dev']
        }
        return info

    def get_version_info(self,params):
        params = self.filter_module_or_repo_selection(params)
        current_version = self.db.get_module_current_versions(module_name=params['module_name'], git_url=params['git_url'])

        if not current_version:
            return None

        # TODO: can make this more effecient and flexible by putting in some indicies and doing the query on mongo side
        # right now, we require a module name / git url, and request specific version based on selectors.  in the future
        # we could, for instance, get all versions that match a particular git commit hash, or timestamp...

        # If version is in params, it should be one of dev, beta, release
        if 'version' in params:
            if params['version'] not in ['dev','beta','release']:
                raise ValueError('invalid version selection, valid versions are: "dev" | "beta" | "release"')
            v = current_version[params['version']]
            # if timestamp or git_commit_hash is given, those need to match as well
            if 'timestamp' in params:
                if v['timestamp'] != params['timestamp'] :
                    return None;
            if 'git_commit_hash' in params:
                if v['git_commit_hash'] != params['git_commit_hash'] :
                    return None;
            return v

        if 'timestamp' in params:
            # first check in current versions
            for version in ['dev','beta','release']:
                if current_version[version]['timestamp'] == params['timestamp']:
                    v = current_version[version]
                    if 'git_commit_hash' in params:
                        if v['git_commit_hash'] != params['git_commit_hash'] :
                            return None;
                    return v
            # if we get here, we have to look in full history
            details = self.db.get_module_full_details(module_name=params['module_name'], git_url=params['git_url'])
            all_versions = details['release_versions']
            if str(params['timestamp']) in all_versions:
                v = all_versions[str(params['timestamp'])]
                if 'git_commit_hash' in params:
                    if v['git_commit_hash'] != params['git_commit_hash'] :
                        return None;
                return v
            return None

        # if we get here, version and timestamp are not defined, so just look for the commit hash
        if 'git_commit_hash' in params:
            # check current versions
            for version in ['dev','beta','release']:
                if current_version[version]['git_commit_hash'] == params['git_commit_hash']:
                    v = current_version[version]
                    return v
            # if we get here, we have to look in full history
            details = self.db.get_module_full_details(module_name=params['module_name'], git_url=params['git_url'])
            all_versions = details['release_versions']
            for timestamp, v in all_versions.iteritems():
                if v['git_commit_hash'] == params['git_commit_hash']:
                    return v
            return None

        # didn't get nothing, so return
        return None

    def list_released_versions(self, params):
        params = self.filter_module_or_repo_selection(params)
        details = self.db.get_module_full_details(module_name=params['module_name'], git_url=params['git_url'])
        return sorted(details['release_versions'].values(), key= lambda v: v['timestamp'])


    def is_registered(self,params):
        if 'git_url' not in params:
            params['git_url'] = ''
        if 'module_name' not in params:
            params['module_name'] = ''
        if self.db.is_registered(module_name=params['module_name'], git_url=params['git_url']) :
            return True
        return False

    # note: maybe a little too mongo centric, but ok for now...
    def list_basic_module_info(self,params):
        query = { 'state.active':True, 'state.released':True }

        if 'include_disabled' in params:
            if params['include_disabled']>0:
                query.pop('state.active',None)

        if 'include_released' not in params:
            params['include_released'] = 1
        if 'include_unreleased' not in params:
            params['include_unreleased'] = 0

        # figure out release/unreleased options so we can get just the unreleased if needed
        # default (if none of these matches is to list only released)
        if params['include_released']<=0 and params['include_unreleased']<=0:
            return [] # don't include anything...
        elif params['include_released']<=0 and params['include_unreleased']>0:
            # minor change that could be removed eventually: check for released=False or missing
            query.pop('state.released',None)
            query['$or']=[{'state.released':False},{'state.released':{'$exists':False}}]
            #query['state.released']=False # include only unreleased (only works if everything has this flag)
        elif params['include_released']>0 and params['include_unreleased']>0:
            query.pop('state.released',None) # include everything

        if 'owners' in params:
            if params['owners']: # might want to filter out empty strings in the future
                query['owners.kb_username']={'$in':params['owners']}

        return self.db.find_basic_module_info(query)


    def set_module_active_state(self, active, params, username):
        params = self.filter_module_or_repo_selection(params)
        if not self.is_admin(username):
            raise ValueError('Only Admin users can set a module to be active/inactive.')
        error = self.db.set_module_active_state(active, module_name=params['module_name'], git_url=params['git_url'])
        if error is not None:
            raise ValueError('Update operation failed - some unknown database error: '+error)


    def approve_developer(self, developer, username):
        if not developer:
            raise ValueError('No username provided')
        if not developer.strip():
            raise ValueError('No username provided')
        if not self.is_admin(username):
            raise ValueError('Only Admin users can approve or revoke developers.')
        self.db.approve_developer(developer)

    def revoke_developer(self, developer, username):
        if not developer:
            raise ValueError('No username provided')
        if not developer.strip():
            raise ValueError('No username provided')
        if not self.is_admin(username):
            raise ValueError('Only Admin users can approve or revoke developers.')
        self.db.revoke_developer(developer)

    def is_approved_developer(self, usernames):
        if not usernames: return []
        return self.db.is_approved_developer(usernames)

    def list_approved_developers(self):
        dev_list = self.db.list_approved_developers()
        simple_kbase_dev_list = []
        for d in dev_list:
            simple_kbase_dev_list.append(d['kb_username'])
        return sorted(simple_kbase_dev_list)


    def get_build_log(self, registration_id):
        try:
            with open(self.temp_dir+'/registration.log.'+str(registration_id)) as log_file:
                log = log_file.read()
        except:
            log = '[log not found - registration_id is invalid or the log has been deleted]'
        return log


    def delete_module(self,params,username):
        if not self.is_admin(username):
            raise ValueError('Only Admin users can migrate module git urls.')
        if 'module_name' not in params and 'git_url' not in params:
            raise ValueError('You must specify the "module_name" or "git_url" of the module to delete.')
        params = self.filter_module_or_repo_selection(params)
        error = self.db.delete_module(module_name=params['module_name'], git_url=params['git_url'])
        if error is not None:
            raise ValueError('Delete operation failed - some unknown database error: '+error)


    def migrate_module_to_new_git_url(self, params, username):
        if not self.is_admin(username):
            raise ValueError('Only Admin users can migrate module git urls.')
        if 'module_name' not in params:
            raise ValueError('You must specify the "module_name" of the module to modify.')
        if 'current_git_url' not in params:
            raise ValueError('You must specify the "current_git_url" of the module to modify.')
        if 'new_git_url' not in params:
            raise ValueError('You must specify the "new_git_url" of the module to modify.')
        if not bool(urlparse(params['new_git_url']).netloc):
            raise ValueError('The new git url is not a valid URL.')
        error = self.db.migrate_module_to_new_git_url(params['module_name'],params['current_git_url'],params['new_git_url'])
        if error is not None:
            raise ValueError('Update operation failed - some unknown database error: '+error)


    # Some utility methods

    def filter_module_or_repo_selection(self, params):
        if 'git_url' not in params:
            params['git_url'] = ''
        if 'module_name' not in params:
            params['module_name'] = ''
        if not self.db.is_registered(module_name=params['module_name'], git_url=params['git_url']) :
            raise ValueError('Operation failed - module/repo is not registered.')
        return params


    # always true if the user is in the admin list
    def has_permission(self, username, owners):
        if self.is_admin(username):
            return True
        for owner in owners:
            if username == owner['kb_username']:
                return True
        return False


    def is_admin(self, username):
        if username in self.adminList:
            return True
        return False


    def version(self):
        return biokbase.catalog.version.CATALOG_VERSION