class Search: def __init__(self, space, version, output_format=None, label=None): LOG.debug('Starting search...') self.space = space self.version = version self.label = label self.param = Parameters() self.conveyor = Conveyor() # identify the workflow type self.conveyor.setOrigin('sapply') # load modelID path = utils.space_path(space, version) meta = os.path.join(path,'space-meta.pkl') try: with open(meta, 'rb') as handle: modelID = pickle.load(handle) except: LOG.critical(f'Unable to load modelID from {meta}. Aborting...') sys.exit() self.conveyor.addMeta('modelID', modelID) LOG.debug (f'Loaded space with modelID: {modelID}') # assign prediction (search) label self.conveyor.addVal(label, 'prediction_label', 'prediction label', 'method', 'single', 'Label used to identify the prediction') success, results = self.param.loadYaml(space, version, isSpace=True) if not success: LOG.critical(f'Unable to load space parameters. {results}. Aborting...') sys.exit() # add additional output formats included in the constructor # this is requiered to add JSON format as output when the object is # instantiated from a web service call, requiring this output if output_format != None: if output_format not in self.param.getVal('output_format'): self.param.appVal('output_format',output_format) return def set_single_CPU(self) -> None: ''' Forces the use of a single CPU ''' LOG.debug('parameter "numCPUs" forced to be 1') self.param.setVal('numCPUs',1) def getVal (self, idict, ikey): if not ikey in idict: return None return idict[ikey] # def run(self, input_source, runtime_param=None, metric=None, numsel=None, cutoff=None): def run(self, param_dict): ''' Executes a default predicton workflow ''' metric = None numsel = None cutoff = None # path to endpoint epd = utils.space_path(self.space, self.version) if not os.path.isdir(epd): LOG.error(f'Unable to find space {self.space}') self.conveyor.setError(f'Unable to find space {self.space}, version {self.version}') if self.getVal(param_dict,'smarts') is not None: input_source = param_dict['smarts'] self.param.setVal('input_type', 'smarts') elif self.getVal(param_dict,'infile') is not None: input_source = param_dict['infile'] else: LOG.error(f'Unable to find input_file') self.conveyor.setError('wrong format in the runtime similarity parameters') if 'runtime_param' in param_dict: runtime_param = self.getVal(param_dict, 'runtime_param') if runtime_param is not None: LOG.info (f'runtime parameters: {str(runtime_param)}') try: with open(runtime_param, 'r') as pfile: rtparam = yaml.safe_load(pfile) try: metric = rtparam['similarity_metric'] numsel = rtparam['similarity_cutoff_num'] cutoff = rtparam['similarity_cutoff_distance'] except: LOG.error('wrong format in the runtime similarity parameters') self.conveyor.setError('wrong format in the runtime similarity parameters') except: LOG.error('runtime similarity parameter file not found') self.conveyor.setError('runtime similarity parameter file not found') else: try: metric = param_dict['metric'] numsel = param_dict['numsel'] cutoff = param_dict['cutoff'] except: LOG.error('wrong format in the runtime similarity parameters') self.conveyor.setError('wrong format in the runtime similarity parameters') md = self.param.getVal('computeMD_method') if utils.isFingerprint(md) and len(md) > 1: LOG.warning(f'When using fingerprints, only a single type of MD can be used to build spaces. Selecting {md[0]}') self.conveyor.setWarning(f'When using fingerprints, only a single type of MD can be used to build spaces. Selecting {md[0]}') self.param.setVal('computeMD_method',[md[0]]) if not self.conveyor.getError(): # uses the child classes within the 'space' folder, # to allow customization of # the processing applied to each space modpath = utils.smodule_path(self.space, self.version) idata_child = importlib.import_module(modpath+".idata_child") sapply_child = importlib.import_module(modpath+".sapply_child") odata_child = importlib.import_module(modpath+".odata_child") # run idata object, in charge of generate space data from input try: idata = idata_child.IdataChild(self.param, self.conveyor, input_source) except: LOG.warning ('Idata child architecture mismatch, defaulting to Idata parent') idata = Idata(self.param, self.conveyor, input_source) idata.run() LOG.debug(f'idata child {type(idata).__name__} completed `run()`') if not self.conveyor.getError(): # make sure there is X data if not self.conveyor.isKey('xmatrix'): if not self.conveyor.isKey ('SMARTS'): LOG.debug(f'Failed to compute MDs') self.conveyor.setError(f'Failed to compute MDs') if not self.conveyor.getError(): # run apply object, in charge of generate a prediction from idata try: sapply = sapply_child.SapplyChild(self.param, self.conveyor) except: LOG.warning ('Sapply child architecture mismatch, defaulting to Sapply parent') sapply = Sapply(self.param, self.conveyor) sapply.run(cutoff, numsel, metric) LOG.debug(f'sapply child {type(sapply).__name__} completed `run()`') # run odata object, in charge of formatting the prediction results # note that if any of the above steps failed, an error has been inserted in the # conveyor and odata will take case of showing an error message try: odata = odata_child.OdataChild(self.param, self.conveyor) except: LOG.warning ('Odata child architecture mismatch, defaulting to Odata parent') odata = Odata(self.param, self.conveyor) return odata.run()
class Predict: def __init__(self, model, version=0, output_format=None, label=None): LOG.debug('Starting predict...') self.model = model self.version = version self.param = Parameters() self.conveyor = Conveyor() # identify the workflow type self.conveyor.setOrigin('apply') # load modelID success, result = utils.getModelID(model, version, 'model') if not success: LOG.critical(f'{result}. Aborting...') sys.exit() self.conveyor.addMeta('modelID', result) LOG.debug (f'Loaded model with modelID: {result}') # assign prediction label self.conveyor.addVal(label, 'prediction_label', 'prediction label', 'method', 'single', 'Label used to identify the prediction') success, results = self.param.loadYaml(model, version) if not success: LOG.critical(f'Unable to load model parameters. {results}. Aborting...') sys.exit() # add additional output formats included in the constructor # this is requiered to add JSON format as output when the object is # instantiated from a web service call, requiring this output if output_format != None: if output_format not in self.param.getVal('output_format'): self.param.appVal('output_format',output_format) if 'ghost' in output_format: self.param.setVal('output_similar', False) return def get_ensemble(self): ''' Returns a Boolean indicating if the model uses external input sources and a list with these sources ''' return self.param.getEnsemble() def set_single_CPU(self) -> None: ''' Forces the use of a single CPU ''' LOG.debug('parameter "numCPUs" forced to be 1') self.param.setVal('numCPUs',1) def run(self, input_source): ''' Executes a default predicton workflow ''' # path to endpoint endpoint = utils.model_path(self.model, self.version) # if not os.path.isdir(endpoint): # self.conveyor.setError(f'Unable to find model {self.model}, version {self.version}') # #LOG.error(f'Unable to find model {self.model}') # if not self.conveyor.getError(): # uses the child classes within the 'model' folder, # to allow customization of # the processing applied to each model modpath = utils.module_path(self.model, self.version) idata_child = importlib.import_module(modpath+".idata_child") apply_child = importlib.import_module(modpath+".apply_child") odata_child = importlib.import_module(modpath+".odata_child") # run idata object, in charge of generate model data from input try: idata = idata_child.IdataChild(self.param, self.conveyor, input_source) except: LOG.warning ('Idata child architecture mismatch, defaulting to Idata parent') idata = Idata(self.param, self.conveyor, input_source) idata.run() LOG.debug(f'idata child {type(idata).__name__} completed `run()`') if not self.conveyor.getError(): success, results = idata.preprocess_apply() if not success: self.conveyor.setError(results) if not self.conveyor.getError(): # make sure there is X data if not self.conveyor.isKey('xmatrix'): LOG.debug(f'Failed to compute MDs') self.conveyor.setError(f'Failed to compute MDs') # for secret models avoid searching similar compounds space_pkl = os.path.join(endpoint,'space.pkl') if not os.path.isfile(space_pkl): self.param.setVal('output_similar', False) if not self.conveyor.getError(): if self.param.getVal('output_similar') is True: from flame.sapply import Sapply metric = self.param.getVal('similarity_metric') numsel = self.param.getVal('similarity_cutoff_num') cutoff = self.param.getVal('similarity_cutoff_distance') # sapply = Sapply(self.param, self.conveyor) sapply_child = importlib.import_module(modpath+".sapply_child") # run apply object, in charge of generate a prediction from idata try: sapply = sapply_child.SapplyChild(self.param, self.conveyor) except: LOG.warning ('Sapply child architecture mismatch, defaulting to Sapply parent') sapply = Sapply(self.param, self.conveyor) sapply.run(cutoff, numsel, metric) LOG.debug(f'sapply child {type(sapply).__name__} completed `run()`') if not self.conveyor.getError(): # run apply object, in charge of generate a prediction from idata try: apply = apply_child.ApplyChild(self.param, self.conveyor) except: LOG.warning ('Apply child architecture mismatch, defaulting to Apply parent') apply = Apply(self.param, self.conveyor) apply.run() LOG.debug(f'apply child {type(apply).__name__} completed `run()`') # run odata object, in charge of formatting the prediction results # note that if any of the above steps failed, an error has been inserted in the # conveyor and odata will take case of showing an error message try: odata = odata_child.OdataChild(self.param, self.conveyor) except: LOG.warning ('Odata child architecture mismatch, defaulting to Odata parent') odata = Odata(self.param, self.conveyor) return odata.run()
def action_import(model): ''' Creates a new model tree from a tarbal file with the name "model.tgz" ''' import re if not model: return False, 'Empty model label' # convert model to endpoint string base_model = os.path.basename(model) endpoint = os.path.splitext(base_model)[0] # find version in case of single version exports version = None if re.match("_v[0-9]{6}", endpoint[-8:]): version = int(endpoint[-6:]) endpoint = endpoint[:-8] ext = os.path.splitext(base_model)[1] base_path = utils.model_tree_path(endpoint) # safety checks if os.path.isdir(base_path): return False, f'Endpoint {endpoint} already exists' if ext != '.tgz': importfile = os.path.abspath(model + '.tgz') else: importfile = model LOG.info(f'Importing {importfile} ...') if not os.path.isfile(importfile): LOG.info(f'Importing package {importfile} not found') return False, f'Importing package {importfile} not found' confidential = False # create directory try: os.mkdir(base_path) except Exception as e: return False, f'Error creating directory {base_path}: {e}' # unpack tar.gz. This is done for any kind of export file with tarfile.open(importfile, 'r:gz') as tar: tar.extractall(base_path) # when importing a single version we need to clone the last folder to 'dev' inner_dirs = os.listdir(base_path) if not 'dev' in inner_dirs and version is not None: # assign single version using file name version_dir = f'ver{version:06d}' # as a fallback assign the last internal folder if not os.path.isdir(version_dir): version_dir = inner_dirs[-1] version_path = os.path.join(base_path, version_dir) confidential_model = os.path.join(version_path, 'confidential_model.yaml') # check if it is a confidential model if (os.path.isfile(confidential_model)): confidential = True flame_source = os.path.dirname(os.path.abspath(__file__)) children_source = os.path.join(flame_source, 'children') children_names = [ 'apply', 'idata', 'odata', 'learn', 'slearn', 'sapply' ] for cname in children_names: cpath = os.path.join(children_source, cname + '_child.py') shutil.copy(cpath, version_path) LOG.info(f'Adding local children: {cpath} ...') # open confidential_model.yaml with open(confidential_model, 'r') as fc: cmodel = yaml.safe_load(fc) # create model-results.pkl model_building_info = [('nobj', '', cmodel['nobj'])] model_building_info += [('nvarx', '', cmodel['nvarx'])] model_building_info += [('model', '', cmodel['model'])] model_type_info = [('quantitative', '', cmodel['quantitative'])] model_type_info += [('conformal', '', cmodel['conformal'])] model_type_info += [('conformal_confidence', '', cmodel['conformal_confidence'])] model_type_info += [('ensemble', '', False)] model_type_info += [('ensemble_names', '', [])] model_type_info += [('ensemble_versions', '', [])] model_type_info += [('confidential', '', True)] model_type_info += [('secret', '', True)] if cmodel['quantitative']: model_validation_info = [('R2', '', cmodel['R2'])] model_validation_info += [('Q2', '', cmodel['Q2'])] model_validation_info += [('SDEC', '', cmodel['SDEC'])] model_validation_info += [('SDEP', '', cmodel['SDEP'])] model_validation_info += [('scoringP', '', cmodel['scoringP'])] model_validation_info += [('scoringR', '', cmodel['scoringR'])] else: model_validation_info = [('MCC_f', '', cmodel['MCC_f'])] model_validation_info += [('MCC', '', cmodel['MCC'])] model_validation_info += [('Sensitivity_f', '', cmodel['Sensitivity_f'])] model_validation_info += [('Sensitivity', '', cmodel['Sensitivity'])] model_validation_info += [('Specificity_f', '', cmodel['Specificity_f'])] model_validation_info += [('Specificity', '', cmodel['Specificity'])] model_validation_info += [('FP_f', '', cmodel['FP_f'])] model_validation_info += [('FP', '', cmodel['FP'])] model_validation_info += [('FN_f', '', cmodel['FN_f'])] model_validation_info += [('FN', '', cmodel['FN'])] model_validation_info += [('TP_f', '', cmodel['TP_f'])] model_validation_info += [('TP', '', cmodel['TP'])] model_validation_info += [('TN_f', '', cmodel['TN_f'])] model_validation_info += [('TN', '', cmodel['TN'])] conveyor = Conveyor() conveyor.addMeta('modelID', cmodel['modelID']) conveyor.addMeta('endpoint', endpoint) conveyor.addMeta('version', version) conveyor.addMeta('quantitative', True) conveyor.addMeta('secret', True) conveyor.addVal(model_building_info, 'model_build_info', 'model building information', 'method', 'single', 'Information about the model building') conveyor.addVal(model_validation_info, 'model_valid_info', 'model validation information', 'method', 'single', 'Information about the model validation') conveyor.addVal(model_type_info, 'model_type_info', 'model type information', 'method', 'single', 'Information about the model type') results_file_name = os.path.join(version_path, 'model-results.pkl') with open(results_file_name, 'wb') as handle: conveyor.save(handle) meta_file_name = os.path.join(version_path, 'model-meta.pkl') with open(meta_file_name, 'wb') as handle: pickle.dump(cmodel['modelID'], handle) pickle.dump(None, handle) pickle.dump(None, handle) pickle.dump(model_building_info, handle) pickle.dump(model_validation_info, handle) pickle.dump(model_type_info, handle) # clone the version in dev shutil.copytree(version_path, os.path.join(base_path, 'dev')) LOG.info(f'Cloning version {version} to version 0 ...') if confidential: LOG.info( f'Import of CONFIDENTIAL model {model} version {version} was successfull' ) return True, 'OK' # get libraries message = f'Endpoint {endpoint} imported OK' for x in os.listdir(base_path): model_path = os.path.join(base_path, x) model_pkl = os.path.join(model_path, 'estimator.pkl') dict_estimator = {} if os.path.isfile(model_pkl): with open(model_pkl, "rb") as input_file: try: dict_estimator = pickle.load(input_file) except Exception as e: return False, f'Incompatible libraries found!. Import aborted with message "{str(e)}"' # check if the libraries used to build this model are similar to current libraries if 'libraries' in dict_estimator: # print (dict_estimator['libraries']) success, results = utils.compatible_modules( dict_estimator['libraries']) if not success: message = f"WARNING: Incompatible libraries detected, {results}. Use at your own risk" return False, message LOG.info( 'Libraries used to generate the imported model are compatible with local libraries' ) LOG.info(message) return True, message
class Sbuild: def __init__(self, space, param_file=None, param_string=None, output_format=None): LOG.debug('Starting sbuild...') self.space = space self.param = Parameters() self.conveyor = Conveyor() # identify the workflow type self.conveyor.setOrigin('slearn') # generate a unique modelID self.conveyor.addMeta('modelID', utils.id_generator()) LOG.debug( f'Generated new space with modelID: {self.conveyor.getMeta("modelID")}' ) # load parameters if param_file is not None: # use the param_file to update existing parameters at the space # directory and save changes to make them persistent success, message = self.param.delta(space, 0, param_file, iformat='YAML', isSpace=True) elif param_string is not None: success, message = self.param.delta(space, 0, param_string, iformat='JSONS', isSpace=True) else: # load parameter file at the space directory success, message = self.param.loadYaml(space, 0, isSpace=True) # being unable to load parameters is a critical error if not success: LOG.critical( f'Unable to load space parameters. {message}. Aborting...') sys.exit(1) md = self.param.getVal('computeMD_method') if utils.isFingerprint(md) and len(md) > 1: LOG.warning( f'When using fingerprints, only a single type of MD can be used to build spaces. Selecting {md[0]}' ) self.conveyor.setWarning( f'When using fingerprints, only a single type of MD can be used to build spaces. Selecting {md[0]}' ) self.param.setVal('computeMD_method', [md[0]]) # add additional output formats included in the constructor # this is requiered to add JSON format as output when the object is # instantiated from a web service call, requiring this output if output_format is not None: if output_format not in self.param.getVal('output_format'): self.param.appVal('output_format', output_format) def set_single_CPU(self) -> None: ''' Forces the use of a single CPU ''' LOG.debug('parameter "numCPUs" forced to be 1') self.param.setVal('numCPUs', 1) def run(self, input_source): ''' Executes a default chemical space building workflow ''' # path to endpoint epd = utils.space_path(self.space, 0) if not os.path.isdir(epd): self.conveyor.setError(f'Unable to find space {self.space}') #LOG.error(f'Unable to find space {self.space}') # import ichild classes if not self.conveyor.getError(): # uses the child classes within the 'space' folder, # to allow customization of the processing applied to each space modpath = utils.smodule_path(self.space, 0) idata_child = importlib.import_module(modpath + ".idata_child") slearn_child = importlib.import_module(modpath + ".slearn_child") odata_child = importlib.import_module(modpath + ".odata_child") # run idata object, in charge of generate space data from input try: idata = idata_child.IdataChild(self.param, self.conveyor, input_source) except: LOG.warning( 'Idata child architecture mismatch, defaulting to Idata parent' ) idata = Idata(self.param, self.conveyor, input_source) idata.run() LOG.debug(f'idata child {type(idata).__name__} completed `run()`') if not self.conveyor.getError(): success, results = idata.preprocess_create() if not success: self.conveyor.setError(results) if not self.conveyor.getError(): # check there is a suitable X and Y if not self.conveyor.isKey('xmatrix'): self.conveyor.setError(f'Failed to compute MDs') if not self.conveyor.getError(): # instantiate learn (build a space from idata) and run it try: slearn = slearn_child.SlearnChild(self.param, self.conveyor) except: LOG.warning( 'Slearn child architecture mismatch, defaulting to Learn parent' ) slearn = Slearn(self.param, self.conveyor) slearn.run() LOG.debug( f'slearn child {type(slearn).__name__} completed `run()`') # run odata object, in charge of formatting the prediction results # note that if any of the above steps failed, an error has been inserted in the # conveyor and odata will take case of showing an error message try: odata = odata_child.OdataChild(self.param, self.conveyor) except: LOG.warning( 'Odata child architecture mismatch, defaulting to Odata parent' ) odata = Odata(self.param, self.conveyor) return odata.run()
class Build: def __init__(self, model, param_file=None, param_string=None, output_format=None): LOG.debug('Starting build...') self.model = model self.param = Parameters() self.conveyor = Conveyor() # identify the workflow type self.conveyor.setOrigin('learn') # generate a unique modelID self.conveyor.addMeta('modelID',utils.id_generator()) LOG.debug(f'Generated new model with modelID: {self.conveyor.getMeta("modelID")}') # load parameters if param_file is not None: # use the param_file to update existing parameters at the model # directory and save changes to make them persistent success, message = self.param.delta(model, 0, param_file, iformat='YAML') elif param_string is not None: success, message = self.param.delta(model, 0, param_string, iformat='JSONS') else: # load parameter file at the model directory success, message = self.param.loadYaml(model, 0) # being unable to load parameters is a critical error if not success: LOG.critical(f'Unable to load model parameters. {message}. Aborting...') sys.exit(1) # add additional output formats included in the constructor # this is requiered to add JSON format as output when the object is # instantiated from a web service call, requiring this output if output_format is not None: if output_format not in self.param.getVal('output_format'): self.param.appVal('output_format',output_format) if self.param.getVal('confidential'): self.confidentialAuditParam() def confidentialAuditParam (self): import yaml original_method = self.param.getVal('model') if self.param.getVal ('quantitative'): if original_method != 'PLSR': self.param.setVal('model', 'PLSR') LOG.info (f'CONFIDENTIALITY AUDIT: the model was set to PLSR, ' f'the original method {original_method} was not suitable to build confidential models') else: if original_method != 'PLSDA': self.param.setVal('model', 'PLSDA') LOG.info (f'CONFIDENTIALITY AUDIT: the model was set to PLSDA, ' f'the original method {original_method} was not suitable to build confidential models') # TODO: conformal support if self.param.getVal('conformal'): self.param.setVal('conformal', False) LOG.info ('CONFIDENTIALITY AUDIT: conformal was set to False. ' 'Conformal models are not supported for now in confidential models') parameters_file_path = utils.model_path(self.model, 0) parameters_file_name = os.path.join (parameters_file_path, 'parameters.yaml') with open(parameters_file_name, 'w') as pfile: yaml.dump (self.param.p, pfile) def get_ensemble(self): ''' Returns a Boolean indicating if the model uses external input sources and a list with these sources ''' return self.param.getEnsemble() def extend_modelID (self, ensembleID): modelID = self.conveyor.getMeta('modelID') modelID = f'{modelID}-{ensembleID}' self.conveyor.addMeta('modelID', modelID) LOG.debug (f'modelID re-defined as {self.conveyor.getVal("modelID")}') def set_single_CPU(self) -> None: ''' Forces the use of a single CPU ''' LOG.debug('parameter "numCPUs" forced to be 1') self.param.setVal('numCPUs',1) def run(self, input_source): ''' Executes a default predicton workflow ''' # path to endpoint epd = utils.model_path(self.model, 0) # if not os.path.isdir(epd): # self.conveyor.setError(f'Unable to find model {self.model}') # #LOG.error(f'Unable to find model {self.model}') # import ichild classes # if not self.conveyor.getError(): # uses the child classes within the 'model' folder, # to allow customization of the processing applied to each model modpath = utils.module_path(self.model, 0) idata_child = importlib.import_module(modpath+".idata_child") learn_child = importlib.import_module(modpath+".learn_child") odata_child = importlib.import_module(modpath+".odata_child") # run idata object, in charge of generate model data from input try: idata = idata_child.IdataChild(self.param, self.conveyor, input_source) except: LOG.warning ('Idata child architecture mismatch, defaulting to Idata parent') idata = Idata(self.param, self.conveyor, input_source) idata.run() LOG.debug(f'idata child {type(idata).__name__} completed `run()`') if not self.conveyor.getError(): success, results = idata.preprocess_create() if not success: self.conveyor.setError(results) if not self.conveyor.getError(): # check there is a suitable X and Y if not self.conveyor.isKey ('xmatrix'): self.conveyor.setError(f'Failed to compute MDs') if not self.conveyor.isKey ('ymatrix'): self.conveyor.setError(f'No activity data (Y) found in training series') # run optional chemical space building for supporting "closest" training series object # if self.param.getVal('buildSimilarity'): if self.param.getVal('output_similar') is True: from flame.slearn import Slearn slearn_child = importlib.import_module(modpath+".slearn_child") if not self.conveyor.getError(): # instantiate learn (build a space from idata) and run it try: slearn = slearn_child.SlearnChild(self.param, self.conveyor) except: LOG.warning ('Slearn child architecture mismatch, defaulting to Learn parent') slearn = Slearn(self.param, self.conveyor) slearn.run() LOG.debug(f'slearn child {type(slearn).__name__} completed `run()`') if not self.conveyor.getError(): # instantiate learn (build a model from idata) and run it try: learn = learn_child.LearnChild(self.param, self.conveyor) except: LOG.warning ('Learn child architecture mismatch, defaulting to Learn parent') learn = Learn(self.param, self.conveyor) learn.run() LOG.debug(f'learn child {type(learn).__name__} completed `run()`') # run odata object, in charge of formatting the prediction results # note that if any of the above steps failed, an error has been inserted in the # conveyor and odata will take case of showing an error message try: odata = odata_child.OdataChild(self.param, self.conveyor) except: LOG.warning ('Odata child architecture mismatch, defaulting to Odata parent') odata = Odata(self.param, self.conveyor) return odata.run()