Esempio n. 1
0
    def confidentialAuditParam (self):
        import yaml

        original_method = self.param.getVal('model')
        if self.param.getVal ('quantitative'):
            if original_method != 'PLSR':
                self.param.setVal('model', 'PLSR')
                LOG.info (f'CONFIDENTIALITY AUDIT: the model was set to PLSR, '
                f'the original method {original_method} was not suitable to build confidential models')
        else:
            if original_method != 'PLSDA':
                self.param.setVal('model', 'PLSDA')
                LOG.info (f'CONFIDENTIALITY AUDIT: the model was set to PLSDA, '
                f'the original method {original_method} was not suitable to build confidential models')
        
        # TODO: conformal support
        if self.param.getVal('conformal'):
            self.param.setVal('conformal', False)
            LOG.info ('CONFIDENTIALITY AUDIT: conformal was set to False. '
            'Conformal models are not supported for now in confidential models')

        parameters_file_path = utils.model_path(self.model, 0)
        parameters_file_name = os.path.join (parameters_file_path,
                                            'parameters.yaml')
        with open(parameters_file_name, 'w') as pfile:
            yaml.dump (self.param.p, pfile)
Esempio n. 2
0
def action_documentation(model, version=None, doc_file=None, oformat='YAML'):
    ''' Returns an object with whole results info for a given model and version '''

    if model is None:
        return False, 'Empty model label'

    from flame.documentation import Documentation

    # get de model repo path
    rdir = utils.model_path(model, version)
    if not os.path.isfile(os.path.join(rdir, 'model-results.pkl')):
        return False, 'Info file not found'

    doc = Documentation(model, version)

    # INPUT, the doc_file will be applied to doc as a delta
    if doc_file is not None:
        success, message = doc.delta(model, version, doc_file, iformat=oformat)
        return success, message

    # CONSOLE PRINTING
    if oformat == 'YAML':
        yaml = doc.dumpYAML()
        for line in yaml:
            line = line.encode("ascii", "ignore")
            line = line.decode("ascii", "ignore")
            print(line)
        return True, 'parameters listed'

    # OUTPUT, the doc will be returened as a documentation object
    return True, doc
Esempio n. 3
0
def verify(endpoint, version=None):

    result = {}
    success, result['documentation'] = verify_documentation(endpoint, version)
    #success, result['model'] = verify_model(endpoint, version)

    if not success:
        return False, result

    success, result['data'] = verify_data(endpoint, version)

    if not success:
        return False, result

    success, result['prediction'] = verify_prediction(endpoint, version)

    if not success:
        return False, result

    meta_path = utils.model_path(endpoint, version)
    verification_file = os.path.join(meta_path, 'verification.pkl')

    #Save in the model folder verification.pkl
    file = open(verification_file, "wb")
    pickle.dump(result, file)
    file.close()
    LOG.info(f'Save verification.pkl file \n')

    show_result(result)

    return True, result
Esempio n. 4
0
def action_model_template(model, version=None, doc_file=None):
    '''
    Returns a TSV model reporting template
    '''
    from flame.documentation import Documentation
    documentation = Documentation(model, version, context='model')

    if not model:
        return False, 'Empty model label'
    # get de model repo path
    rdir = utils.model_path(model, version)
    if not os.path.isfile(os.path.join(rdir, 'results.pkl')):
        # compatibity method. use info.pkl
        if not os.path.isfile(os.path.join(rdir, 'info.pkl')):
            return False, 'Info file not found'
    else:
        # new method, use results.pkl
        if not os.path.isfile(os.path.join(rdir, 'results.pkl')):
            return False, 'Info file not found'

    if doc_file is not None:
        # use the param_file to update existing parameters at the model
        # directory and save changes to make them persistent
        success, message = documentation.delta(model,
                                               0,
                                               doc_file,
                                               iformat='YAML')
        print(success, message)

    documentation.get_upf_template2()

    return True, 'Model documentation template created'
Esempio n. 5
0
    def delta(self, model, version, param_file, iformat='YAML'):
        ''' load a set of parameters from the configuration file present 
            at the model directory

            also, inserts the keys present in the param_file provided, 
            assuming that it contains a YAML-compatible format, like the one
            generated by manage

            adds some parameters identifying the model and the 
            hash of the configuration file 
        '''
        if not self.loadYaml(model, version):
            return False, 'file not found'

        # parse parameter file assuning it will be in
        # a YAML-compatible format

        try:
            with open(param_file, 'r') as pfile:
                if iformat == 'YAML':
                    newp = yaml.load(pfile)
                elif iformat == 'JSON':
                    newp = json.load(pfile)
        except Exception as e:
            return False, e

        # update interna dict with keys in the input file (delta)
        black_list = [
            'param_format', 'version', 'model_path', 'endpoint', 'md5'
        ]
        for key in newp:
            if key not in black_list:

                val = newp[key]

                # YAML define null values as 'None, which are interpreted
                # as strings
                if val == 'None':
                    val = None

                if isinstance(val, dict):
                    for inner_key in val:
                        inner_val = val[inner_key]
                        self.setInnerVal(key, inner_key, inner_val)
                        #print ('@delta: adding',key, inner_key, inner_val)
                else:
                    self.setVal(key, val)
                    #print ('@delta: adding',key,val,type(val))

        # dump internal dict to the parameters file
        parameters_file_path = utils.model_path(model, version)
        parameters_file_name = os.path.join(parameters_file_path,
                                            'parameters.yaml')
        try:
            with open(parameters_file_name, 'w') as pfile:
                yaml.dump(self.p, pfile)
        except Exception as e:
            return False, 'unable to write parameters'

        return True, 'OK'
Esempio n. 6
0
def verify_prediction(endpoint, version=None):

    meta_path = utils.model_path(endpoint, version)
    training_file = os.path.join(meta_path, 'training_series')
    if not os.path.isfile(training_file):
        return True, {'status': 'Failed', 'comments': '', 'Information': []}

    return True, {'status': 'Passed', 'comments': '', 'Information': []}
Esempio n. 7
0
    def run(self, input_source):
        ''' Executes a default predicton workflow '''

        # path to endpoint
        # path to endpoint
        endpoint = utils.model_path(self.model, self.version)
        if not os.path.isdir(endpoint):
            self.conveyor.setError(f'Unable to find model {self.model}, version {self.version}')
            #LOG.error(f'Unable to find model {self.model}')


        if not self.conveyor.getError():
            # uses the child classes within the 'model' folder,
            # to allow customization of
            # the processing applied to each model
            modpath = utils.module_path(self.model, self.version)

            idata_child = importlib.import_module(modpath+".idata_child")
            apply_child = importlib.import_module(modpath+".apply_child")
            odata_child = importlib.import_module(modpath+".odata_child")

            # run idata object, in charge of generate model data from input
            try:
                idata = idata_child.IdataChild(self.param, self.conveyor, input_source)
            except:
                LOG.warning ('Idata child architecture mismatch, defaulting to Idata parent')
                idata = Idata(self.param, self.conveyor, input_source)

            idata.run()
            LOG.debug(f'idata child {type(idata).__name__} completed `run()`')

        if not self.conveyor.getError():
            # make sure there is X data
            if not self.conveyor.isKey('xmatrix'):
                LOG.debug(f'Failed to compute MDs')
                self.conveyor.setError(f'Failed to compute MDs')

        if not self.conveyor.getError():
            # run apply object, in charge of generate a prediction from idata
            try:
                apply = apply_child.ApplyChild(self.param, self.conveyor)
            except:
                LOG.warning ('Apply child architecture mismatch, defaulting to Apply parent')
                apply = Apply(self.param, self.conveyor)

            apply.run()
            LOG.debug(f'apply child {type(apply).__name__} completed `run()`')

        # run odata object, in charge of formatting the prediction results
        # note that if any of the above steps failed, an error has been inserted in the
        # conveyor and odata will take case of showing an error message
        try:
            odata = odata_child.OdataChild(self.param, self.conveyor)
        except:
            LOG.warning ('Odata child architecture mismatch, defaulting to Odata parent')
            odata = Odata(self.param, self.conveyor)

        return odata.run()
Esempio n. 8
0
    def saveJSON(self, model, version, input_JSON):
        p = json.load(input_JSON)
        parameters_file_path = utils.model_path(model, version)
        parameters_file_name = os.path.join(parameters_file_path,
                                            'parameters.yaml')
        try:
            with open(parameters_file_name, 'w') as pfile:
                yaml.dump(p, pfile)
        except Exception as e:
            return False

        return True
Esempio n. 9
0
    def loadYaml(self, model, version, isSpace=False):
        ''' load a set of parameters from the configuration file present 
            at the model directory

            adds some parameters identifying the model and the 
            hash of the configuration file 
        '''

        # obtain the path and the default name of the model parameters
        if isSpace:
            parameters_file_path = utils.space_path(model, version)
        else:
            parameters_file_path = utils.model_path(model, version)

        if not os.path.isdir(parameters_file_path):
            return False, f'Model "{model}", version "{version}" not found'

        parameters_file_name = os.path.join(parameters_file_path,
                                            'parameters.yaml')

        # load the main class dictionary (p) from this yaml file
        if not os.path.isfile(parameters_file_name):
            return False, 'Parameters file not found'

        try:
            with open(parameters_file_name, 'r') as pfile:
                self.p = yaml.safe_load(pfile)
        except Exception as e:
            return False, e

        # check version of the parameter file
        # no 'version' key mans version < 2.0
        if 'param_format' in self.p:
            self.extended = True
        else:
            self.extended = False
            self.param_format = 1.0

        # # correct CV to kfold for conformal models
        # if self.getVal('conformal') is True:
        #     self.setVal('ModelValidationCV','kfold')
        if self.getVal('model') == 'majority':
            self.setVal('conformal', False)

        # add keys for the model and a MD5 hash
        self.setVal('endpoint', model)
        self.setVal('version', version)
        self.setVal('model_path', parameters_file_path)
        # self.setVal('md5',utils.md5sum(parameters_file_name))
        self.setVal('md5', self.idataHash())

        return True, 'OK'
Esempio n. 10
0
    def run(self, input_source):
        ''' Executes a default predicton workflow '''

        results = {}

        # path to endpoint
        epd = utils.model_path(self.model, 0)
        if not os.path.isdir(epd):
            LOG.error(f'Unable to find model {self.model}')
            results['error'] = 'unable to find model: '+self.model

        if 'error' not in results:
            # uses the child classes within the 'model' folder,
            # to allow customization of  the processing applied to each model
            modpath = utils.module_path(self.model, 0)

            idata_child = importlib.import_module(modpath+".idata_child")
            learn_child = importlib.import_module(modpath+".learn_child")
            odata_child = importlib.import_module(modpath+".odata_child")

            LOG.debug('child modules imported: '
                      f' {idata_child.__name__},'
                      f' {learn_child.__name__},'
                      f' {odata_child.__name__}')

            # run idata object, in charge of generate model
            idata = idata_child.IdataChild(self.parameters, input_source)
            results = idata.run() 
            LOG.debug(f'idata child {idata_child.__name__} completed `run()`')

        if 'error' not in results:
            if 'xmatrix' not in results:
                LOG.error(f'Failed to compute MDs')
                results['error'] = 'Failed to compute MDs'

            if 'ymatrix' not in results:
                LOG.error(f'No activity data (Y) found in training series')
                results['error'] = 'No activity data (Y) found in training series'
        
        if 'error' not in results:
            # run learn object, in charge of generate a prediction from idata
            learn = learn_child.LearnChild(self.parameters, results)
            results = learn.run()
            LOG.debug(f'learn child {learn_child.__name__} completed `run()`')

        # run odata object, in charge of formatting the prediction results
        # note that if any of the above steps failed, an error has been inserted in the
        # results and odata will take case of showing an error message
        odata = odata_child.OdataChild(self.parameters, results)
        LOG.info('Building completed')
        return odata.run()
Esempio n. 11
0
def verify_documentation(endpoint, version=None):
    '''
      Check that the required fields are completed
    '''

    blacklist = [
        'Species', 'Limits_applicability', 'Experimental_protocol', 'location',
        'description', 'endpoint_positive', 'endpoint_negative',
        'raw_data_url', 'test_set_size', 'training_set_url', 'test_set_url',
        'bootstrap', 'ccp_alpha', 'criterion', 'max_depth', 'max_features',
        'max_leaf_nodes', 'max_samples', 'min_impurity_decrease',
        'min_impurity_split', 'min_samples_leaf', 'min_samples_split',
        'min_weight_fraction_leaf', 'n_estimators', 'n_jobs', 'oob_score',
        'random_state', 'verbose', 'warm_start', 'confidence', 'ACP_sampler',
        'KNN_NN', 'aggregated', 'aggregation_function', 'conformal_predictors',
        'normalizing_model', 'Conformal_mean_interval', 'Conformal_accuracy',
        'Q2', 'SDEP', 'Comments', 'Other_related_models', 'Date_of_QMRF',
        'Date_of_QMRF_updates', 'QMRF_updates', 'References',
        'QMRF_same_models', 'Mechanistic_basis', 'Mechanistic_references',
        'Supporting_information', 'Comment_on_the_endpoint',
        'Endpoint_data_quality_and_variability', 'Descriptor_selection',
        'Internal_validation_2', 'External_validation'
    ]

    if endpoint is None:
        return False, 'Empty model label'

    # get de model repo path
    rdir = utils.model_path(endpoint, version)
    if not os.path.isfile(os.path.join(rdir, 'model-results.pkl')):
        return False, 'Info file not found'

    doc = Documentation(endpoint, version)

    fields = [field for field in doc.empty_fields() if field not in blacklist]

    if fields:
        result = {
            'status': 'Failed',
            'comments': 'fields not completed',
            'Information': fields
        }
    else:
        result = {
            'status': 'Passed',
            'comments': 'All fields required are completed',
            'Information': []
        }

    return True, result
Esempio n. 12
0
def build_cmd(model, output_format=None):
    '''
    Instantiates a Build object to build a model using the given
    input file and model. 

    This method must be self-contained and suitable for being called in
    cascade, by models which use the output of other models as input
    '''
    # safety check if model exists
    repo_path = pathlib.Path(utils.model_repository_path())
    model_list = os.listdir(repo_path)

    if model['endpoint'] not in model_list:
        LOG.error('endpoint name not found in model repository.')
        raise ValueError('Wrong endpoint name. '
                         f"{model['endpoint']} does not exist")

    build = Build(model['endpoint'], output_format)

    ext_input, model_set = build.get_model_set()

    if ext_input:

        success, model_res = get_external_input(build, model_set,
                                                model['infile'])

        if not success:
            return False, model_res

        # now run the model using the data from the external sources
        success, results = build.run(model_res)

    else:

        ifile = model['infile']

        if not os.path.isfile(ifile):
            return False, 'wrong training series file'

        epd = utils.model_path(model['endpoint'], 0)
        lfile = os.path.join(epd, os.path.basename(ifile))
        try:
            shutil.copy(ifile, lfile)
        except shutil.SameFileError:
            LOG.warning('Building model with the input SDF'
                        f' present in model folder {lfile}')
        # run the model with the input file
        success, results = build.run(lfile)

    return success, results
Esempio n. 13
0
    def run(self, input_source):
        ''' Executes a default predicton workflow '''

        results = {}

        # path to endpoint
        endpoint = utils.model_path(self.model, self.version)
        if not os.path.isdir(endpoint):

            LOG.debug('Unable to find model'
                      ' {} version {}'.format(self.model, self.version))

            results['error'] = 'unable to find model: ' + \
                self.model+' version: '+str(self.version)

        if 'error' not in results:
            # uses the child classes within the 'model' folder,
            # to allow customization of
            # the processing applied to each model
            modpath = utils.module_path(self.model, self.version)

            idata_child = importlib.import_module(modpath + ".idata_child")
            apply_child = importlib.import_module(modpath + ".apply_child")
            odata_child = importlib.import_module(modpath + ".odata_child")

            LOG.debug('child modules imported: '
                      f' {idata_child.__name__},'
                      f' {apply_child.__name__},'
                      f' {odata_child.__name__}')

            # run idata object, in charge of generate model data from input
            idata = idata_child.IdataChild(self.parameters, input_source)
            results = idata.run()
            LOG.debug(f'idata child {idata_child.__name__} completed `run()`')

        if 'error' not in results:
            if 'xmatrix' not in results:
                LOG.debug(f'Failed to compute MDs')
                results['error'] = 'Failed to compute MDs'

        if 'error' not in results:
            # run apply object, in charge of generate a prediction from idata
            apply = apply_child.ApplyChild(self.parameters, results)
            results = apply.run()
            LOG.debug(f'apply child {apply_child.__name__} completed `run()`')

        # run odata object, in charge of formatting the prediction results or any error
        odata = odata_child.OdataChild(self.parameters, results)
        LOG.info('Prediction completed')
        return odata.run()
Esempio n. 14
0
def action_label(model, version=None, labels=None, oformat='text'):
    ''' Returns / sets the model labels '''

    if model is None:
        return False, 'Empty model label'

    # get de model repo path
    rdir = utils.model_path(model, version)

    if not os.path.isdir(rdir):
        return False, f'Model {model} not found'

    if labels is not None:
        if oformat == 'JSONS':
            try:
                p = json.loads(labels)
            except Exception as e:
                return False, str(e)
        else:
            try:
                with open(labels, 'r') as fi:
                    p = yaml.safe_load(fi)
            except Exception as e:
                return False, e

        for ikey in p:
            if len(p[ikey]) > 15:
                return False, f'labels should be shorter than 15 chars. Label "{ikey} : {p[ikey]}" is {len(p[ikey])} chars long'
            elif len(p[ikey]) < 2:
                return False, f'labels should be longer than 2 chars. Label "{ikey} : {p[ikey]}" is {len(p[ikey])} chars long'
        try:
            with open(os.path.join(rdir, 'model-labels.pkl'), 'wb') as fo:
                pickle.dump(p, fo)
        except Exception as e:
            return False, e

    # open labels
    try:
        with open(os.path.join(rdir, 'model-labels.pkl'), 'rb') as fi:
            p = pickle.load(fi)
    except Exception as e:
        return False, e

    if oformat == 'text':
        for ikey in p:
            print(f'{ikey}\t{p[ikey]}')
        return True, 'success'

    return True, p
Esempio n. 15
0
def get_verification(endpoint, version):
    '''
    Retrieves the model verification if it exists
    '''
    verification = False
    meta_path = utils.model_path(endpoint, version)
    verification_file = os.path.join(meta_path, 'verification.pkl')

    if os.path.isfile(verification_file):
        file = open(verification_file, "rb")
        verification = pickle.load(file)
        file.close()
        return True, verification

    return False
Esempio n. 16
0
def action_series(model, version):
    '''
    Returns the training series used for building a certain model /version 
    '''
    if model is None:
        return False, 'Empty model label'

    meta_path = utils.model_path(model, version)
    training_file = os.path.join(meta_path, 'training_series')
    if not os.path.isfile(training_file):
        return False, 'training series file not found'

    shutil.copy(training_file, './training_series.sdf')
    
    LOG.info(f'Training series for model {model}, version {version}, saved as "training_series.sdf"')
    return True, 'OK'
Esempio n. 17
0
def action_results(model, version=None, ouput_variables=False):
    ''' Returns a JSON with whole results info for a given model and version '''

    if model is None:
        return False, 'Empty model label'

    rdir = utils.model_path(model, version)
    if not os.path.isfile(os.path.join(rdir, 'results.pkl')):
        return False, 'results not found'

    from flame.conveyor import Conveyor

    conveyor = Conveyor()
    with open(os.path.join(rdir, 'results.pkl'), 'rb') as handle:
        conveyor.load(handle)

    return True, conveyor.getJSON()
Esempio n. 18
0
    def update_file(self, model, version=0):
        '''Function to save current parameter values modified
        at the object level (i.e: From a interactive python shell)
        '''
        p = self.p
        if not p:
            return False, 'No loaded parameters'

        parameters_file_path = utils.model_path(model, version)
        parameters_file_name = os.path.join(parameters_file_path,
                                            'parameters.yaml')
        try:
            with open(parameters_file_name, 'w') as pfile:
                yaml.dump(p, pfile)
        except Exception as e:
            return False, e
        return True
Esempio n. 19
0
def action_results(model, version=None, ouput_variables=False):
    ''' Returns an object with whole results info for a given model and version '''

    if model is None:
        return False, {'code': 1, 'message': 'Empty model label'}

    results_path = utils.model_path(model, version)
    results_file = os.path.join(results_path, 'model-results.pkl')

    if not os.path.isfile(results_file):
        return False, {'code': 0, 'message': 'Results file not found'}

    conveyor = Conveyor()

    with open(results_file, 'rb') as handle:
        conveyor.load(handle)

    return True, conveyor
Esempio n. 20
0
    def load_results(self):
        '''
            Load results pickle with model information
        '''
        # obtain the path and the default name of the results file
        results_file_path = utils.model_path(self.model, self.version)
        results_file_name = os.path.join(results_file_path, 'results.pkl')
        self.conveyor = Conveyor()
        # load the main class dictionary (p) from this yaml file
        if not os.path.isfile(results_file_name):
            raise Exception('Results file not found')

        try:
            with open(results_file_name, "rb") as input_file:
                self.conveyor.load(input_file)
        except Exception as e:
            # LOG.error(f'No valid results pickle found at:
            # {results_file_name}')
            raise e
Esempio n. 21
0
def action_remove(model, version):
    '''
    Remove the version indicated as argument from the model tree indicated
    as argument
    '''

    if not model:
        return False, 'Empty model label'

    if version == 0:
        return False, 'Development version cannot be removed, provide a version number'

    rdir = utils.model_path(model, version)
    if not os.path.isdir(rdir):
        return False, f'Version {version} not found'

    shutil.rmtree(rdir, ignore_errors=True)
    LOG.info(f'Version {version} of model {model} has been removed')
    return True, f'Version {version} of model {model} has been removed'
Esempio n. 22
0
    def __init__(self, model, version=0, context='model'):
        ''' Load the fields from the documentation file'''

        self.model = model
        self.version = version
        self.fields = None
        self.parameters = Parameters()
        self.conveyor = None

        # obtain the path and the default name of the model documents
        documentation_file_path = utils.model_path(self.model, self.version)
        documentation_file_name = os.path.join(documentation_file_path,
                                               'documentation.yaml')

        # load the main class dictionary (p) from this yaml file
        if not os.path.isfile(documentation_file_name):
            raise Exception('Documentation file not found')

        try:
            with open(documentation_file_name, 'r') as documentation_file:
                self.fields = yaml.safe_load(documentation_file)
        except Exception as e:
            # LOG.error(f'Error loading documentation file with exception: {e}')
            raise e

        success, message = self.parameters.loadYaml(model, version)

        if not success:
            print(
                f'Parameters could not be loaded. {message}. Please make sure endpoint and version are correct'
            )
            return

        # Remove this after acc
        #self.load_parameters()
        if context == 'model':
            self.load_results()
            self.assign_parameters()
            self.assign_results()
            self.autocomplete_documentation()
            self.setVal('md5', self.idataHash())
Esempio n. 23
0
def action_remove(model, version):
    '''
    Remove the version indicated as argument from the model tree indicated
    as argument
    '''

    if not model:
        LOG.error('empty model label')
        return False, 'empty model label'

    if version == 0:
        LOG.error('development version cannot be removed')
        return False, 'development version cannot be removed'

    rdir = utils.model_path(model, version)
    if not os.path.isdir(rdir):
        LOG.error('version {} not found')
        return False, 'version not found'

    shutil.rmtree(rdir, ignore_errors=True)
    LOG.info(f'version {version} of model {model} has removed')
    return True, 'version '+str(version)+' of model '+model+' removed'
Esempio n. 24
0
    def loadYaml(self, model, version):
        ''' load a set of parameters from the configuration file present 
            at the model directory

            adds some parameters identifying the model and the 
            hash of the configuration file 
        '''

        # obtain the path and the default name of the model parameters
        parameters_file_path = utils.model_path(model, version)
        parameters_file_name = os.path.join(parameters_file_path,
                                            'parameters.yaml')

        # load the main class dictionary (p) from this yaml file
        if not os.path.isfile(parameters_file_name):
            return False, 'file not found'

        try:
            with open(parameters_file_name, 'r') as pfile:
                self.p = yaml.safe_load(pfile)
        except Exception as e:
            return False, e

        # check version of the parameter file
        # no 'version' key mans version < 2.0
        if 'param_format' in self.p:
            self.extended = True
        else:
            self.extended = False
            self.param_format = 1.0

        # add keys for the model and a MD5 hash
        self.setVal('endpoint', model)
        self.setVal('version', version)
        self.setVal('model_path', parameters_file_path)
        self.setVal('md5', utils.md5sum(parameters_file_name))

        return True, 'OK'
Esempio n. 25
0
    def __init__(self, model, version):

        parameters_file_path = utils.model_path(model, version)
        parameters_file_name = os.path.join(parameters_file_path,
                                            'parameters.yaml')

        if not os.path.isfile(parameters_file_name):
            LOG.critical('Unable to load model parameters. Aborting...')
            sys.exit()

        try:
            with open(parameters_file_name, 'r') as pfile:
                self.p = yaml.load(pfile)
        except Exception as e:
            LOG.critical('Unable to load model parameters. Aborting...')
            sys.exit()

        self.setVal('endpoint', model)
        self.setVal('version', version)
        self.setVal('model_path', parameters_file_path)
        self.setVal('md5', utils.md5sum(parameters_file_name))

        return
Esempio n. 26
0
def build_cmd(arguments, output_format=None):
    '''
    Instantiates a Build object to build a model using the given
    input file and model. 

    This method must be self-contained and suitable for being called in
    cascade, by models which use the output of other models as input
    '''
    
    from flame.build import Build

    # safety check if model exists
    endpoint_dir = utils.model_path(arguments['endpoint'], 0)
    if not os.path.isdir(endpoint_dir):
        return False, 'Endpoint name not found in model repository.'

    # remove pre-existing results file
    results_file = os.path.join(endpoint_dir, 'model-results.pkl')
    if os.path.isfile(results_file):
        os.remove(results_file)

    meta_file = os.path.join(endpoint_dir, 'model-meta.pkl')
    if os.path.isfile(meta_file):
        os.remove(meta_file)

    # input file provided in the command
    ifile = arguments['infile']
    if ifile is not None and not os.path.isfile (ifile):
        return False, f'Wrong training series file {ifile}'

    # lfile is the "training_series" copied internally to the endpoint folder
    endpoint_path = utils.model_path(arguments['endpoint'], 0)
    lfile = os.path.join(endpoint_path, 'training_series')

    if 'param_file' in arguments:
        build = Build(arguments['endpoint'], param_file=arguments['param_file'], output_format=output_format)
    elif 'param_string' in arguments:
        build = Build(arguments['endpoint'], param_string=arguments['param_string'], output_format=output_format)
    else:
        build = Build(arguments['endpoint'], output_format=output_format)

    if utils.isSingleThread():
        build.set_single_CPU()

    ensemble = build.get_ensemble()

    # ensemble[0]     Boolean with True for ensemble models and False otherwyse
    # ensemble[1]     List of ensemble model model_names
    # ensemble[2]     List of ensemble model versions

    if ensemble[0]:

        emodels = ensemble[1]
        evers   = ensemble[2]

        if ifile is None:
            if not os.path.isfile (lfile):
                return False, 'no training series detected'
        else:
            try:
                safe_copy(ifile, lfile)
                # shutil.copy(ifile, lfile)
            except:
                return False, 'Unable to copy input file to model directory'
        
        success, model_res = get_ensemble_input(build, emodels, evers, lfile)

        if not success:
            return False, model_res

        for i in range(len (emodels)):
            success, iID = utils.getModelID(emodels[i], evers[i], 'model')
            if success:
                build.extend_modelID(iID)

        LOG.debug(f'New modelID is: {build.conveyor.getMeta("modelID")}')

        # now run the model using the data from the external sources
        success, results = build.run(model_res)

    else:

        # when a new training series is provided in the command line
        # try to copy it to the model directory
        if ifile is not None:

            # in case of incremental training, add the input file at the end of existing file
            if arguments['incremental']:
                if arguments['incremental'] and os.path.isfile(lfile):
                    LOG.info(f'Merging file {ifile} with existing training series')
                    new_training = os.path.join(endpoint_path, 'temp_training')

                    with open(new_training, 'w') as outfile:

                        # handling the extra newline of SDFiles is problematic. We are delaying the
                        # output of the newline by striping newlines and adding an universal newline
                        # at the next line for the first block  
                        first = True
                        with codecs.open(lfile, 'r', encoding='utf-8', errors='ignore') as infile:
                            for line in infile:
                                if first:
                                    outfile.write(f'{line.rstrip()}')
                                    first = False
                                else:
                                    outfile.write(f'\n{line.rstrip()}')

                        # for the second block we add the preceding newline in all lines 
                        with codecs.open(ifile, 'r', encoding='utf-8', errors='ignore') as infile:
                            for line in infile:
                                outfile.write(f'\n{line.rstrip()}')

                    shutil.move(new_training, lfile)
            else:
                try:
                    safe_copy (ifile, lfile)
                    # shutil.copy(ifile, lfile)
                except:
                    return False, 'Unable to copy input file to model directory'

        # check that the local copy of the input file exists
        if not os.path.isfile(lfile):
            return False, 'No training series found'

        # run the model with the input file
        success, results = build.run(lfile)

    return success, results
Esempio n. 27
0
def predict_cmd(arguments, output_format=None):
    '''
    Instantiates a Predict object to run a prediction using the given input
    file and model.

    This method must be self-contained and suitable for being called in
    cascade, by models which use the output of other models as input.
    '''
    from flame.predict import Predict

    # safety check if model exists
    endpoint_dir = utils.model_path(arguments['endpoint'], 0)
    if not os.path.isdir(endpoint_dir):
        return False, 'Endpoint name not found in model repository.'

    # ** DEPRECATE **
    # this is a back-compatibility trick for older versions of APIs 
    # not supporting the label argument
    if 'label' not in arguments:
        arguments['label'] = 'temp'

    if 'output_format' in arguments:
        output_format = arguments['output_format']

    predict = Predict(arguments['endpoint'], version=arguments['version'],  output_format=output_format, label=arguments['label'])

    if utils.isSingleThread():
        predict.set_single_CPU()

    ensemble = predict.get_ensemble()

    # ensemble[0]     Boolean with True for ensemble models and False otherwyse
    # ensemble[1]     List of ensemble model model_names
    # ensemble[2]     List of ensemble model versions

    if ensemble[0]:

        if arguments['infile'] is None:
            return False, 'ensemble models require allways an input file'

        emodels = ensemble[1]
        evers   = ensemble[2]

        success, model_res = get_ensemble_input(predict, emodels, evers, arguments['infile'])

        if not success:
            predict.conveyor.setError (model_res)
            LOG.error (model_res)
            # return False, model_res        # TO-DO, comment this line and run prediction to allow odata to generate error info

        # check the presence of changes in the inner models
        modelID = predict.conveyor.getMeta('modelID')
        for i in range(len (emodels)):
            success, iID = utils.getModelID(emodels[i], evers[i], 'model')
            if success:
                if iID not in modelID:
                    predict.conveyor.setWarning (f'Inner model {emodels[i]}.{evers[i]} has been updated. Rebuilding of ensemble model is recommended')
                    LOG.warning (f'Inner model {emodels[i]}.{evers[i]} has been updated. Rebuilding of ensemble model is recommended')

        # now run the model using the data from the external sources
        success, results = predict.run(model_res)

    else:

        # run the model with the input file
        success, results = predict.run(arguments['infile'])

    LOG.info('Prediction completed...')

    return success, results
Esempio n. 28
0
def action_results(model, version=None, ouput_variables=False):
    ''' Returns a JSON with whole results info for a given model and version '''

    if model is None:
        return False, 'empty model label'

    if version is None:
        return False, 'no version provided'

    rdir = utils.model_path(model, version)
    if not os.path.isfile(os.path.join(rdir, 'results.pkl')):
        return False, 'results not found'

    # retrieve a pickle file containing the keys 'model_build' 
    # and 'model_validate' of results
    with open(os.path.join(rdir, 'results.pkl'), 'rb') as handle:
        results = pickle.load(handle)

    # this code serializes the results in a list and then converts it 
    # to a JSON  

    json_results = {}

    # creates a list with the keys which should NOT be included
    black_list = []
    for k in results['manifest']:

        ###
        # By default do not include 'var' arrays, only 'obj' arrays
        # to avoid including the X matrix and save space
        # 
        # this black list can be easily tuned to include everything
        # or to remove other keys
        ###
        if not ouput_variables:
            if (k['dimension'] in ['vars']):
                black_list.append(k['key'])

    # iterate keys and for those not in the black list
    # format the information to JSON
    for key in results:

        if key in black_list:
            continue

        value = results[key]

        # np.arrays cannot be serialized to JSON and must be transformed
        if isinstance(value, np.ndarray):

            # do not process bi-dimensional arrays
            if len (np.shape(value)) > 1 :
                continue

            # boolean must be transformed to 'True' or 'False' strings
            if 'bool_' in str(type(value[0])):
                json_results[key] = [
                    'True' if x else 'False' for x in value]

            # we assume that np.array must contain np.floats
            else:
                # This removes NaN and and creates
                # a plain list from ndarrays
                json_results[key] = [x if not np.isnan(
                    x) else None for x in value]

        else:
            json_results[key] = value
        
        try:
            output = json.dumps(json_results)
        except:
            return False, 'unable to serialize to JSON the results'

    return True, output
Esempio n. 29
0
def action_info(model, version=None, output='JSON'):
    '''
    Returns a text or JSON with results info for a given model and version
    '''

    if model is None:
        return False, 'empty model label'

    if version is None:
        return False, 'no version provided'

    rdir = utils.model_path(model, version)
    if not os.path.isfile(os.path.join(rdir, 'results.pkl')):

        # compatibity method. use info.pkl
        if not os.path.isfile(os.path.join(rdir, 'info.pkl')):
            return False, 'info not found'

        with open(os.path.join(rdir, 'info.pkl'), 'rb') as handle:
            #retrieve a pickle file containing the keys 'model_build' 
            #and 'model_validate' of results
            info = pickle.load(handle)
            info += pickle.load(handle)
        # end of compatibility method

    else:
        # new method, use results.pkl
        with open(os.path.join(rdir, 'results.pkl'), 'rb') as handle:
            results = pickle.load(handle)
        
        info = None
        if 'model_build_info' in results:
            info =  results['model_build_info']

        if info == None:
            return False, 'info not found'

        if 'model_valid_info' in results:
            info += results['model_valid_info']
        
        if info == None:
            return False, 'info not found'

    # when this function is called from the console, output is 'text'
    # write and exit
    if output == 'text':
        for val in info:
            if len(val) < 3:
                print(val)
            else:
                print(val[0], ' (', val[1], ') : ', val[2])
        return True, 'model informed OK'

    # this is only reached when this funcion is called from a web service
    # asking for a JSON
    # 
    # this code serializes the results in a list and then converts it 
    # to a JSON  
    json_results = []
    for i in info:
        # results must be checked to avoid numpy elements not JSON serializable
        if 'numpy.int64' in str(type(i[2])):
            try:
                v = int(i[2])
            except Exception as e:
                LOG.error(e)
                v = None
            json_results.append((i[0], i[1], v))

        elif 'numpy.float64' in str(type(i[2])):
            try:
                v = float(i[2])
            except Exception as e:
                LOG.error(e)
                v = None
            json_results.append((i[0], i[1], v))

        elif isinstance(i[2], np.ndarray):
            if 'bool_' in str(type(i[2][0])):
                temp_results = [
                    'True' if x else 'False' for x in i[2]]
            else:
                # This removes NaN and and creates
                # a plain list of formatted floats from ndarrays
                temp_results = [float("{0:.4f}".format(x)) if not np.isnan(x) else None for x in i[2]]

            json_results.append((i[0], i[1], temp_results ))

        else:
            json_results.append(i)

    return True, json.dumps(json_results)
Esempio n. 30
0
    def delta(self, model, version, param, iformat='YAML', isSpace=False):
        ''' load a set of parameters from the configuration file present 
            at the model directory

            also, inserts the keys present in the param_file provided, 
            assuming that it contains a YAML-compatible format, like the one
            generated by manage

            adds some parameters identifying the model and the 
            hash of the configuration file 
        '''

        if not self.loadYaml(model, version, isSpace):
            return False, 'file not found'

        # parse parameter file assuning it will be in
        # a YAML-compatible format
        if iformat == 'JSONS':
            try:
                newp = json.loads(param)
            except Exception as e:
                return False, e
        else:
            try:
                with open(param, 'r') as pfile:
                    if iformat == 'YAML':
                        newp = yaml.safe_load(pfile)
                    elif iformat == 'JSON':
                        newp = json.load(pfile)
            except Exception as e:
                return False, e

        self.applyDelta(newp)

        # # update interna dict with keys in the input file (delta)
        # black_list = ['param_format','version','model_path','endpoint','md5']
        # for key in newp:
        #     if key not in black_list:

        #         val = newp[key]

        #         # YAML define null values as 'None, which are interpreted
        #         # as strings
        #         if val == 'None':
        #             val = None

        #         if isinstance(val ,dict):
        #             for inner_key in val:
        #                 inner_val = val[inner_key]

        #                 if inner_val == 'None':
        #                     inner_val = None

        #                 self.setInnerVal(key, inner_key, inner_val)
        #                 #print ('@delta: adding',key, inner_key, inner_val)
        #         else:
        #             self.setVal(key,val)
        #             #print ('@delta: adding',key,val,type(val))

        # dump internal dict to the parameters file
        if isSpace:
            parameters_file_path = utils.space_path(model, version)
        else:
            parameters_file_path = utils.model_path(model, version)

        parameters_file_name = os.path.join(parameters_file_path,
                                            'parameters.yaml')
        try:
            with open(parameters_file_name, 'w') as pfile:
                yaml.dump(self.p, pfile)
        except Exception as e:
            return False, 'unable to write parameters'

        # # correct CV to kfold for conformal models
        # if self.getVal('conformal') is True:
        #     self.setVal('ModelValidationCV','kfold')
        if self.getVal('model') == 'majority':
            self.setVal('conformal', False)

        # self.setVal('md5',utils.md5sum(parameters_file_name))
        self.setVal('md5', self.idataHash())

        return True, 'OK'