class Search: def __init__(self, space, version, output_format=None, label=None): LOG.debug('Starting search...') self.space = space self.version = version self.label = label self.param = Parameters() self.conveyor = Conveyor() # identify the workflow type self.conveyor.setOrigin('sapply') # load modelID path = utils.space_path(space, version) meta = os.path.join(path,'space-meta.pkl') try: with open(meta, 'rb') as handle: modelID = pickle.load(handle) except: LOG.critical(f'Unable to load modelID from {meta}. Aborting...') sys.exit() self.conveyor.addMeta('modelID', modelID) LOG.debug (f'Loaded space with modelID: {modelID}') # assign prediction (search) label self.conveyor.addVal(label, 'prediction_label', 'prediction label', 'method', 'single', 'Label used to identify the prediction') success, results = self.param.loadYaml(space, version, isSpace=True) if not success: LOG.critical(f'Unable to load space parameters. {results}. Aborting...') sys.exit() # add additional output formats included in the constructor # this is requiered to add JSON format as output when the object is # instantiated from a web service call, requiring this output if output_format != None: if output_format not in self.param.getVal('output_format'): self.param.appVal('output_format',output_format) return def set_single_CPU(self) -> None: ''' Forces the use of a single CPU ''' LOG.debug('parameter "numCPUs" forced to be 1') self.param.setVal('numCPUs',1) def getVal (self, idict, ikey): if not ikey in idict: return None return idict[ikey] # def run(self, input_source, runtime_param=None, metric=None, numsel=None, cutoff=None): def run(self, param_dict): ''' Executes a default predicton workflow ''' metric = None numsel = None cutoff = None # path to endpoint epd = utils.space_path(self.space, self.version) if not os.path.isdir(epd): LOG.error(f'Unable to find space {self.space}') self.conveyor.setError(f'Unable to find space {self.space}, version {self.version}') if self.getVal(param_dict,'smarts') is not None: input_source = param_dict['smarts'] self.param.setVal('input_type', 'smarts') elif self.getVal(param_dict,'infile') is not None: input_source = param_dict['infile'] else: LOG.error(f'Unable to find input_file') self.conveyor.setError('wrong format in the runtime similarity parameters') if 'runtime_param' in param_dict: runtime_param = self.getVal(param_dict, 'runtime_param') if runtime_param is not None: LOG.info (f'runtime parameters: {str(runtime_param)}') try: with open(runtime_param, 'r') as pfile: rtparam = yaml.safe_load(pfile) try: metric = rtparam['similarity_metric'] numsel = rtparam['similarity_cutoff_num'] cutoff = rtparam['similarity_cutoff_distance'] except: LOG.error('wrong format in the runtime similarity parameters') self.conveyor.setError('wrong format in the runtime similarity parameters') except: LOG.error('runtime similarity parameter file not found') self.conveyor.setError('runtime similarity parameter file not found') else: try: metric = param_dict['metric'] numsel = param_dict['numsel'] cutoff = param_dict['cutoff'] except: LOG.error('wrong format in the runtime similarity parameters') self.conveyor.setError('wrong format in the runtime similarity parameters') md = self.param.getVal('computeMD_method') if utils.isFingerprint(md) and len(md) > 1: LOG.warning(f'When using fingerprints, only a single type of MD can be used to build spaces. Selecting {md[0]}') self.conveyor.setWarning(f'When using fingerprints, only a single type of MD can be used to build spaces. Selecting {md[0]}') self.param.setVal('computeMD_method',[md[0]]) if not self.conveyor.getError(): # uses the child classes within the 'space' folder, # to allow customization of # the processing applied to each space modpath = utils.smodule_path(self.space, self.version) idata_child = importlib.import_module(modpath+".idata_child") sapply_child = importlib.import_module(modpath+".sapply_child") odata_child = importlib.import_module(modpath+".odata_child") # run idata object, in charge of generate space data from input try: idata = idata_child.IdataChild(self.param, self.conveyor, input_source) except: LOG.warning ('Idata child architecture mismatch, defaulting to Idata parent') idata = Idata(self.param, self.conveyor, input_source) idata.run() LOG.debug(f'idata child {type(idata).__name__} completed `run()`') if not self.conveyor.getError(): # make sure there is X data if not self.conveyor.isKey('xmatrix'): if not self.conveyor.isKey ('SMARTS'): LOG.debug(f'Failed to compute MDs') self.conveyor.setError(f'Failed to compute MDs') if not self.conveyor.getError(): # run apply object, in charge of generate a prediction from idata try: sapply = sapply_child.SapplyChild(self.param, self.conveyor) except: LOG.warning ('Sapply child architecture mismatch, defaulting to Sapply parent') sapply = Sapply(self.param, self.conveyor) sapply.run(cutoff, numsel, metric) LOG.debug(f'sapply child {type(sapply).__name__} completed `run()`') # run odata object, in charge of formatting the prediction results # note that if any of the above steps failed, an error has been inserted in the # conveyor and odata will take case of showing an error message try: odata = odata_child.OdataChild(self.param, self.conveyor) except: LOG.warning ('Odata child architecture mismatch, defaulting to Odata parent') odata = Odata(self.param, self.conveyor) return odata.run()
class Predict: def __init__(self, model, version=0, output_format=None, label=None): LOG.debug('Starting predict...') self.model = model self.version = version self.param = Parameters() self.conveyor = Conveyor() # identify the workflow type self.conveyor.setOrigin('apply') # load modelID success, result = utils.getModelID(model, version, 'model') if not success: LOG.critical(f'{result}. Aborting...') sys.exit() self.conveyor.addMeta('modelID', result) LOG.debug (f'Loaded model with modelID: {result}') # assign prediction label self.conveyor.addVal(label, 'prediction_label', 'prediction label', 'method', 'single', 'Label used to identify the prediction') success, results = self.param.loadYaml(model, version) if not success: LOG.critical(f'Unable to load model parameters. {results}. Aborting...') sys.exit() # add additional output formats included in the constructor # this is requiered to add JSON format as output when the object is # instantiated from a web service call, requiring this output if output_format != None: if output_format not in self.param.getVal('output_format'): self.param.appVal('output_format',output_format) if 'ghost' in output_format: self.param.setVal('output_similar', False) return def get_ensemble(self): ''' Returns a Boolean indicating if the model uses external input sources and a list with these sources ''' return self.param.getEnsemble() def set_single_CPU(self) -> None: ''' Forces the use of a single CPU ''' LOG.debug('parameter "numCPUs" forced to be 1') self.param.setVal('numCPUs',1) def run(self, input_source): ''' Executes a default predicton workflow ''' # path to endpoint endpoint = utils.model_path(self.model, self.version) # if not os.path.isdir(endpoint): # self.conveyor.setError(f'Unable to find model {self.model}, version {self.version}') # #LOG.error(f'Unable to find model {self.model}') # if not self.conveyor.getError(): # uses the child classes within the 'model' folder, # to allow customization of # the processing applied to each model modpath = utils.module_path(self.model, self.version) idata_child = importlib.import_module(modpath+".idata_child") apply_child = importlib.import_module(modpath+".apply_child") odata_child = importlib.import_module(modpath+".odata_child") # run idata object, in charge of generate model data from input try: idata = idata_child.IdataChild(self.param, self.conveyor, input_source) except: LOG.warning ('Idata child architecture mismatch, defaulting to Idata parent') idata = Idata(self.param, self.conveyor, input_source) idata.run() LOG.debug(f'idata child {type(idata).__name__} completed `run()`') if not self.conveyor.getError(): success, results = idata.preprocess_apply() if not success: self.conveyor.setError(results) if not self.conveyor.getError(): # make sure there is X data if not self.conveyor.isKey('xmatrix'): LOG.debug(f'Failed to compute MDs') self.conveyor.setError(f'Failed to compute MDs') # for secret models avoid searching similar compounds space_pkl = os.path.join(endpoint,'space.pkl') if not os.path.isfile(space_pkl): self.param.setVal('output_similar', False) if not self.conveyor.getError(): if self.param.getVal('output_similar') is True: from flame.sapply import Sapply metric = self.param.getVal('similarity_metric') numsel = self.param.getVal('similarity_cutoff_num') cutoff = self.param.getVal('similarity_cutoff_distance') # sapply = Sapply(self.param, self.conveyor) sapply_child = importlib.import_module(modpath+".sapply_child") # run apply object, in charge of generate a prediction from idata try: sapply = sapply_child.SapplyChild(self.param, self.conveyor) except: LOG.warning ('Sapply child architecture mismatch, defaulting to Sapply parent') sapply = Sapply(self.param, self.conveyor) sapply.run(cutoff, numsel, metric) LOG.debug(f'sapply child {type(sapply).__name__} completed `run()`') if not self.conveyor.getError(): # run apply object, in charge of generate a prediction from idata try: apply = apply_child.ApplyChild(self.param, self.conveyor) except: LOG.warning ('Apply child architecture mismatch, defaulting to Apply parent') apply = Apply(self.param, self.conveyor) apply.run() LOG.debug(f'apply child {type(apply).__name__} completed `run()`') # run odata object, in charge of formatting the prediction results # note that if any of the above steps failed, an error has been inserted in the # conveyor and odata will take case of showing an error message try: odata = odata_child.OdataChild(self.param, self.conveyor) except: LOG.warning ('Odata child architecture mismatch, defaulting to Odata parent') odata = Odata(self.param, self.conveyor) return odata.run()
def action_import(model): ''' Creates a new model tree from a tarbal file with the name "model.tgz" ''' import re if not model: return False, 'Empty model label' # convert model to endpoint string base_model = os.path.basename(model) endpoint = os.path.splitext(base_model)[0] # find version in case of single version exports version = None if re.match("_v[0-9]{6}", endpoint[-8:]): version = int(endpoint[-6:]) endpoint = endpoint[:-8] ext = os.path.splitext(base_model)[1] base_path = utils.model_tree_path(endpoint) # safety checks if os.path.isdir(base_path): return False, f'Endpoint {endpoint} already exists' if ext != '.tgz': importfile = os.path.abspath(model + '.tgz') else: importfile = model LOG.info(f'Importing {importfile} ...') if not os.path.isfile(importfile): LOG.info(f'Importing package {importfile} not found') return False, f'Importing package {importfile} not found' confidential = False # create directory try: os.mkdir(base_path) except Exception as e: return False, f'Error creating directory {base_path}: {e}' # unpack tar.gz. This is done for any kind of export file with tarfile.open(importfile, 'r:gz') as tar: tar.extractall(base_path) # when importing a single version we need to clone the last folder to 'dev' inner_dirs = os.listdir(base_path) if not 'dev' in inner_dirs and version is not None: # assign single version using file name version_dir = f'ver{version:06d}' # as a fallback assign the last internal folder if not os.path.isdir(version_dir): version_dir = inner_dirs[-1] version_path = os.path.join(base_path, version_dir) confidential_model = os.path.join(version_path, 'confidential_model.yaml') # check if it is a confidential model if (os.path.isfile(confidential_model)): confidential = True flame_source = os.path.dirname(os.path.abspath(__file__)) children_source = os.path.join(flame_source, 'children') children_names = [ 'apply', 'idata', 'odata', 'learn', 'slearn', 'sapply' ] for cname in children_names: cpath = os.path.join(children_source, cname + '_child.py') shutil.copy(cpath, version_path) LOG.info(f'Adding local children: {cpath} ...') # open confidential_model.yaml with open(confidential_model, 'r') as fc: cmodel = yaml.safe_load(fc) # create model-results.pkl model_building_info = [('nobj', '', cmodel['nobj'])] model_building_info += [('nvarx', '', cmodel['nvarx'])] model_building_info += [('model', '', cmodel['model'])] model_type_info = [('quantitative', '', cmodel['quantitative'])] model_type_info += [('conformal', '', cmodel['conformal'])] model_type_info += [('conformal_confidence', '', cmodel['conformal_confidence'])] model_type_info += [('ensemble', '', False)] model_type_info += [('ensemble_names', '', [])] model_type_info += [('ensemble_versions', '', [])] model_type_info += [('confidential', '', True)] model_type_info += [('secret', '', True)] if cmodel['quantitative']: model_validation_info = [('R2', '', cmodel['R2'])] model_validation_info += [('Q2', '', cmodel['Q2'])] model_validation_info += [('SDEC', '', cmodel['SDEC'])] model_validation_info += [('SDEP', '', cmodel['SDEP'])] model_validation_info += [('scoringP', '', cmodel['scoringP'])] model_validation_info += [('scoringR', '', cmodel['scoringR'])] else: model_validation_info = [('MCC_f', '', cmodel['MCC_f'])] model_validation_info += [('MCC', '', cmodel['MCC'])] model_validation_info += [('Sensitivity_f', '', cmodel['Sensitivity_f'])] model_validation_info += [('Sensitivity', '', cmodel['Sensitivity'])] model_validation_info += [('Specificity_f', '', cmodel['Specificity_f'])] model_validation_info += [('Specificity', '', cmodel['Specificity'])] model_validation_info += [('FP_f', '', cmodel['FP_f'])] model_validation_info += [('FP', '', cmodel['FP'])] model_validation_info += [('FN_f', '', cmodel['FN_f'])] model_validation_info += [('FN', '', cmodel['FN'])] model_validation_info += [('TP_f', '', cmodel['TP_f'])] model_validation_info += [('TP', '', cmodel['TP'])] model_validation_info += [('TN_f', '', cmodel['TN_f'])] model_validation_info += [('TN', '', cmodel['TN'])] conveyor = Conveyor() conveyor.addMeta('modelID', cmodel['modelID']) conveyor.addMeta('endpoint', endpoint) conveyor.addMeta('version', version) conveyor.addMeta('quantitative', True) conveyor.addMeta('secret', True) conveyor.addVal(model_building_info, 'model_build_info', 'model building information', 'method', 'single', 'Information about the model building') conveyor.addVal(model_validation_info, 'model_valid_info', 'model validation information', 'method', 'single', 'Information about the model validation') conveyor.addVal(model_type_info, 'model_type_info', 'model type information', 'method', 'single', 'Information about the model type') results_file_name = os.path.join(version_path, 'model-results.pkl') with open(results_file_name, 'wb') as handle: conveyor.save(handle) meta_file_name = os.path.join(version_path, 'model-meta.pkl') with open(meta_file_name, 'wb') as handle: pickle.dump(cmodel['modelID'], handle) pickle.dump(None, handle) pickle.dump(None, handle) pickle.dump(model_building_info, handle) pickle.dump(model_validation_info, handle) pickle.dump(model_type_info, handle) # clone the version in dev shutil.copytree(version_path, os.path.join(base_path, 'dev')) LOG.info(f'Cloning version {version} to version 0 ...') if confidential: LOG.info( f'Import of CONFIDENTIAL model {model} version {version} was successfull' ) return True, 'OK' # get libraries message = f'Endpoint {endpoint} imported OK' for x in os.listdir(base_path): model_path = os.path.join(base_path, x) model_pkl = os.path.join(model_path, 'estimator.pkl') dict_estimator = {} if os.path.isfile(model_pkl): with open(model_pkl, "rb") as input_file: try: dict_estimator = pickle.load(input_file) except Exception as e: return False, f'Incompatible libraries found!. Import aborted with message "{str(e)}"' # check if the libraries used to build this model are similar to current libraries if 'libraries' in dict_estimator: # print (dict_estimator['libraries']) success, results = utils.compatible_modules( dict_estimator['libraries']) if not success: message = f"WARNING: Incompatible libraries detected, {results}. Use at your own risk" return False, message LOG.info( 'Libraries used to generate the imported model are compatible with local libraries' ) LOG.info(message) return True, message
class Predict: def __init__(self, model, version=0, output_format=None, label=None): LOG.debug('Starting predict...') self.model = model self.version = version self.param = Parameters() self.conveyor = Conveyor() self.conveyor.addVal(label, 'prediction_label', 'prediction label', 'method', 'single', 'Label used to identify the prediction') if not self.param.loadYaml(model, version): LOG.critical('Unable to load model parameters. Aborting...') sys.exit() # add additional output formats included in the constructor # this is requiered to add JSON format as output when the object is # instantiated from a web service call, requiring this output if output_format != None: if output_format not in self.param.getVal('output_format'): self.param.appVal('output_format', output_format) return def get_ensemble(self): ''' Returns a Boolean indicating if the model uses external input sources and a list with these sources ''' return self.param.getEnsemble() def set_single_CPU(self) -> None: ''' Forces the use of a single CPU ''' LOG.debug('parameter "numCPUs" forced to be 1') self.param.setVal('numCPUs', 1) def run(self, input_source): ''' Executes a default predicton workflow ''' # path to endpoint # path to endpoint endpoint = utils.model_path(self.model, self.version) if not os.path.isdir(endpoint): self.conveyor.setError( f'Unable to find model {self.model}, version {self.version}') #LOG.error(f'Unable to find model {self.model}') if not self.conveyor.getError(): # uses the child classes within the 'model' folder, # to allow customization of # the processing applied to each model modpath = utils.module_path(self.model, self.version) idata_child = importlib.import_module(modpath + ".idata_child") apply_child = importlib.import_module(modpath + ".apply_child") odata_child = importlib.import_module(modpath + ".odata_child") # run idata object, in charge of generate model data from input try: idata = idata_child.IdataChild(self.param, self.conveyor, input_source) except: LOG.warning( 'Idata child architecture mismatch, defaulting to Idata parent' ) idata = Idata(self.param, self.conveyor, input_source) idata.run() LOG.debug(f'idata child {type(idata).__name__} completed `run()`') if not self.conveyor.getError(): # make sure there is X data if not self.conveyor.isKey('xmatrix'): LOG.debug(f'Failed to compute MDs') self.conveyor.setError(f'Failed to compute MDs') if not self.conveyor.getError(): # run apply object, in charge of generate a prediction from idata try: apply = apply_child.ApplyChild(self.param, self.conveyor) except: LOG.warning( 'Apply child architecture mismatch, defaulting to Apply parent' ) apply = Apply(self.param, self.conveyor) apply.run() LOG.debug(f'apply child {type(apply).__name__} completed `run()`') # run odata object, in charge of formatting the prediction results # note that if any of the above steps failed, an error has been inserted in the # conveyor and odata will take case of showing an error message try: odata = odata_child.OdataChild(self.param, self.conveyor) except: LOG.warning( 'Odata child architecture mismatch, defaulting to Odata parent' ) odata = Odata(self.param, self.conveyor) return odata.run()
class Search: def __init__(self, space, version, output_format=None, label=None): LOG.debug('Starting predict...') self.space = space self.version = version self.label = label self.param = Parameters() self.conveyor = Conveyor() self.conveyor.addVal(label, 'prediction_label', 'prediction label', 'method', 'single', 'Label used to identify the prediction') if not self.param.loadYaml(space, version, isSpace=True): LOG.critical('Unable to load space parameters. Aborting...') sys.exit() # add additional output formats included in the constructor # this is requiered to add JSON format as output when the object is # instantiated from a web service call, requiring this output if output_format != None: if output_format not in self.param.getVal('output_format'): self.param.appVal('output_format', output_format) return def set_single_CPU(self) -> None: ''' Forces the use of a single CPU ''' LOG.debug('parameter "numCPUs" forced to be 1') self.param.setVal('numCPUs', 1) # def run(self, input_source, runtime_param=None, metric=None, numsel=None, cutoff=None): def run(self, param_dict): ''' Executes a default predicton workflow ''' print('*********', param_dict) metric = None numsel = None cutoff = None # path to endpoint epd = utils.space_path(self.space, self.version) if not os.path.isdir(epd): self.conveyor.setError( f'Unable to find space {self.space}, version {self.version}') #LOG.error(f'Unable to find space {self.space}') if 'infile' in param_dict: input_source = param_dict['infile'] else: LOG.error(f'Unable to find input_file') self.conveyor.setError( 'wrong format in the runtime similarity parameters') if 'runtime_param' in param_dict: runtime_param = param_dict['runtime_param'] if runtime_param is not None: print(runtime_param) try: with open(runtime_param, 'r') as pfile: rtparam = yaml.safe_load(pfile) try: metric = rtparam['similarity_metric'] numsel = rtparam['similarity_cutoff_num'] cutoff = rtparam['similarity_cutoff_distance'] except: LOG.error( 'wrong format in the runtime similarity parameters' ) self.conveyor.setError( 'wrong format in the runtime similarity parameters' ) except: LOG.error('runtime similarity parameter file not found') self.conveyor.setError( 'runtime similarity parameter file not found') else: try: metric = param_dict['metric'] numsel = param_dict['numsel'] cutoff = param_dict['cutoff'] except: LOG.error('wrong format in the runtime similarity parameters') self.conveyor.setError( 'wrong format in the runtime similarity parameters') if not self.conveyor.getError(): # uses the child classes within the 'space' folder, # to allow customization of # the processing applied to each space modpath = utils.smodule_path(self.space, self.version) idata_child = importlib.import_module(modpath + ".idata_child") sapply_child = importlib.import_module(modpath + ".sapply_child") odata_child = importlib.import_module(modpath + ".odata_child") # run idata object, in charge of generate space data from input try: idata = idata_child.IdataChild(self.param, self.conveyor, input_source) except: LOG.warning( 'Idata child architecture mismatch, defaulting to Idata parent' ) idata = Idata(self.param, self.conveyor, input_source) idata.run() LOG.debug(f'idata child {type(idata).__name__} completed `run()`') if not self.conveyor.getError(): # make sure there is X data if not self.conveyor.isKey('xmatrix'): LOG.debug(f'Failed to compute MDs') self.conveyor.setError(f'Failed to compute MDs') if not self.conveyor.getError(): # run apply object, in charge of generate a prediction from idata try: sapply = sapply_child.SapplyChild(self.param, self.conveyor) except: LOG.warning( 'Sapply child architecture mismatch, defaulting to Sapply parent' ) sapply = Sapply(self.param, self.conveyor) sapply.run(cutoff, numsel, metric) LOG.debug( f'sapply child {type(sapply).__name__} completed `run()`') # run odata object, in charge of formatting the prediction results # note that if any of the above steps failed, an error has been inserted in the # conveyor and odata will take case of showing an error message try: odata = odata_child.OdataChild(self.param, self.conveyor, self.label) except: LOG.warning( 'Odata child architecture mismatch, defaulting to Odata parent' ) odata = Odata(self.param, self.conveyor, self.label) return odata.run()