class Predict: def __init__(self, model, version=0, output_format=None, label=None): LOG.debug('Starting predict...') self.model = model self.version = version self.param = Parameters() self.conveyor = Conveyor() # identify the workflow type self.conveyor.setOrigin('apply') # load modelID success, result = utils.getModelID(model, version, 'model') if not success: LOG.critical(f'{result}. Aborting...') sys.exit() self.conveyor.addMeta('modelID', result) LOG.debug (f'Loaded model with modelID: {result}') # assign prediction label self.conveyor.addVal(label, 'prediction_label', 'prediction label', 'method', 'single', 'Label used to identify the prediction') success, results = self.param.loadYaml(model, version) if not success: LOG.critical(f'Unable to load model parameters. {results}. Aborting...') sys.exit() # add additional output formats included in the constructor # this is requiered to add JSON format as output when the object is # instantiated from a web service call, requiring this output if output_format != None: if output_format not in self.param.getVal('output_format'): self.param.appVal('output_format',output_format) if 'ghost' in output_format: self.param.setVal('output_similar', False) return def get_ensemble(self): ''' Returns a Boolean indicating if the model uses external input sources and a list with these sources ''' return self.param.getEnsemble() def set_single_CPU(self) -> None: ''' Forces the use of a single CPU ''' LOG.debug('parameter "numCPUs" forced to be 1') self.param.setVal('numCPUs',1) def run(self, input_source): ''' Executes a default predicton workflow ''' # path to endpoint endpoint = utils.model_path(self.model, self.version) # if not os.path.isdir(endpoint): # self.conveyor.setError(f'Unable to find model {self.model}, version {self.version}') # #LOG.error(f'Unable to find model {self.model}') # if not self.conveyor.getError(): # uses the child classes within the 'model' folder, # to allow customization of # the processing applied to each model modpath = utils.module_path(self.model, self.version) idata_child = importlib.import_module(modpath+".idata_child") apply_child = importlib.import_module(modpath+".apply_child") odata_child = importlib.import_module(modpath+".odata_child") # run idata object, in charge of generate model data from input try: idata = idata_child.IdataChild(self.param, self.conveyor, input_source) except: LOG.warning ('Idata child architecture mismatch, defaulting to Idata parent') idata = Idata(self.param, self.conveyor, input_source) idata.run() LOG.debug(f'idata child {type(idata).__name__} completed `run()`') if not self.conveyor.getError(): success, results = idata.preprocess_apply() if not success: self.conveyor.setError(results) if not self.conveyor.getError(): # make sure there is X data if not self.conveyor.isKey('xmatrix'): LOG.debug(f'Failed to compute MDs') self.conveyor.setError(f'Failed to compute MDs') # for secret models avoid searching similar compounds space_pkl = os.path.join(endpoint,'space.pkl') if not os.path.isfile(space_pkl): self.param.setVal('output_similar', False) if not self.conveyor.getError(): if self.param.getVal('output_similar') is True: from flame.sapply import Sapply metric = self.param.getVal('similarity_metric') numsel = self.param.getVal('similarity_cutoff_num') cutoff = self.param.getVal('similarity_cutoff_distance') # sapply = Sapply(self.param, self.conveyor) sapply_child = importlib.import_module(modpath+".sapply_child") # run apply object, in charge of generate a prediction from idata try: sapply = sapply_child.SapplyChild(self.param, self.conveyor) except: LOG.warning ('Sapply child architecture mismatch, defaulting to Sapply parent') sapply = Sapply(self.param, self.conveyor) sapply.run(cutoff, numsel, metric) LOG.debug(f'sapply child {type(sapply).__name__} completed `run()`') if not self.conveyor.getError(): # run apply object, in charge of generate a prediction from idata try: apply = apply_child.ApplyChild(self.param, self.conveyor) except: LOG.warning ('Apply child architecture mismatch, defaulting to Apply parent') apply = Apply(self.param, self.conveyor) apply.run() LOG.debug(f'apply child {type(apply).__name__} completed `run()`') # run odata object, in charge of formatting the prediction results # note that if any of the above steps failed, an error has been inserted in the # conveyor and odata will take case of showing an error message try: odata = odata_child.OdataChild(self.param, self.conveyor) except: LOG.warning ('Odata child architecture mismatch, defaulting to Odata parent') odata = Odata(self.param, self.conveyor) return odata.run()
class Predict: def __init__(self, model, version=0, output_format=None, label=None): LOG.debug('Starting predict...') self.model = model self.version = version self.param = Parameters() self.conveyor = Conveyor() self.conveyor.addVal(label, 'prediction_label', 'prediction label', 'method', 'single', 'Label used to identify the prediction') if not self.param.loadYaml(model, version): LOG.critical('Unable to load model parameters. Aborting...') sys.exit() # add additional output formats included in the constructor # this is requiered to add JSON format as output when the object is # instantiated from a web service call, requiring this output if output_format != None: if output_format not in self.param.getVal('output_format'): self.param.appVal('output_format', output_format) return def get_ensemble(self): ''' Returns a Boolean indicating if the model uses external input sources and a list with these sources ''' return self.param.getEnsemble() def set_single_CPU(self) -> None: ''' Forces the use of a single CPU ''' LOG.debug('parameter "numCPUs" forced to be 1') self.param.setVal('numCPUs', 1) def run(self, input_source): ''' Executes a default predicton workflow ''' # path to endpoint # path to endpoint endpoint = utils.model_path(self.model, self.version) if not os.path.isdir(endpoint): self.conveyor.setError( f'Unable to find model {self.model}, version {self.version}') #LOG.error(f'Unable to find model {self.model}') if not self.conveyor.getError(): # uses the child classes within the 'model' folder, # to allow customization of # the processing applied to each model modpath = utils.module_path(self.model, self.version) idata_child = importlib.import_module(modpath + ".idata_child") apply_child = importlib.import_module(modpath + ".apply_child") odata_child = importlib.import_module(modpath + ".odata_child") # run idata object, in charge of generate model data from input try: idata = idata_child.IdataChild(self.param, self.conveyor, input_source) except: LOG.warning( 'Idata child architecture mismatch, defaulting to Idata parent' ) idata = Idata(self.param, self.conveyor, input_source) idata.run() LOG.debug(f'idata child {type(idata).__name__} completed `run()`') if not self.conveyor.getError(): # make sure there is X data if not self.conveyor.isKey('xmatrix'): LOG.debug(f'Failed to compute MDs') self.conveyor.setError(f'Failed to compute MDs') if not self.conveyor.getError(): # run apply object, in charge of generate a prediction from idata try: apply = apply_child.ApplyChild(self.param, self.conveyor) except: LOG.warning( 'Apply child architecture mismatch, defaulting to Apply parent' ) apply = Apply(self.param, self.conveyor) apply.run() LOG.debug(f'apply child {type(apply).__name__} completed `run()`') # run odata object, in charge of formatting the prediction results # note that if any of the above steps failed, an error has been inserted in the # conveyor and odata will take case of showing an error message try: odata = odata_child.OdataChild(self.param, self.conveyor) except: LOG.warning( 'Odata child architecture mismatch, defaulting to Odata parent' ) odata = Odata(self.param, self.conveyor) return odata.run()
class Build: def __init__(self, model, param_file=None, param_string=None, output_format=None): LOG.debug('Starting build...') self.model = model self.param = Parameters() self.conveyor = Conveyor() # load parameters if param_file is not None: # use the param_file to update existing parameters at the model # directory and save changes to make them persistent success, message = self.param.delta(model, 0, param_file, iformat='YAML') elif param_string is not None: success, message = self.param.delta(model, 0, param_string, iformat='JSONS') else: # load parameter file at the model directory success, message = self.param.loadYaml(model, 0) # being unable to load parameters is a critical error if not success: LOG.critical( f'Unable to load model parameters. "{message}" Aborting...') sys.exit(1) # add additional output formats included in the constructor # this is requiered to add JSON format as output when the object is # instantiated from a web service call, requiring this output if output_format is not None: if output_format not in self.param.getVal('output_format'): self.param.appVal('output_format', output_format) def get_ensemble(self): ''' Returns a Boolean indicating if the model uses external input sources and a list with these sources ''' return self.param.getEnsemble() def set_single_CPU(self) -> None: ''' Forces the use of a single CPU ''' LOG.debug('parameter "numCPUs" forced to be 1') self.param.setVal('numCPUs', 1) def run(self, input_source): ''' Executes a default predicton workflow ''' # path to endpoint epd = utils.model_path(self.model, 0) if not os.path.isdir(epd): self.conveyor.setError(f'Unable to find model {self.model}') #LOG.error(f'Unable to find model {self.model}') # import ichild classes if not self.conveyor.getError(): # uses the child classes within the 'model' folder, # to allow customization of the processing applied to each model modpath = utils.module_path(self.model, 0) idata_child = importlib.import_module(modpath + ".idata_child") learn_child = importlib.import_module(modpath + ".learn_child") odata_child = importlib.import_module(modpath + ".odata_child") # run idata object, in charge of generate model data from input try: idata = idata_child.IdataChild(self.param, self.conveyor, input_source) except: LOG.warning( 'Idata child architecture mismatch, defaulting to Idata parent' ) idata = Idata(self.param, self.conveyor, input_source) idata.run() LOG.debug(f'idata child {type(idata).__name__} completed `run()`') if not self.conveyor.getError(): # check there is a suitable X and Y if not self.conveyor.isKey('xmatrix'): self.conveyor.setError(f'Failed to compute MDs') if not self.conveyor.isKey('ymatrix'): self.conveyor.setError( f'No activity data (Y) found in training series') if not self.conveyor.getError(): # instantiate learn (build a model from idata) and run it learn = learn_child.LearnChild(self.param, self.conveyor) learn.run() try: learn = learn_child.LearnChild(self.param, self.conveyor) except: LOG.warning( 'Learn child architecture mismatch, defaulting to Learn parent' ) learn = Learn(self.param, self.conveyor) LOG.debug(f'learn child {type(learn).__name__} completed `run()`') # run odata object, in charge of formatting the prediction results # note that if any of the above steps failed, an error has been inserted in the # conveyor and odata will take case of showing an error message try: odata = odata_child.OdataChild(self.param, self.conveyor) except: LOG.warning( 'Odata child architecture mismatch, defaulting to Odata parent' ) odata = Odata(self.param, self.conveyor) return odata.run()
class Build: def __init__(self, model, param_file=None, param_string=None, output_format=None): LOG.debug('Starting build...') self.model = model self.param = Parameters() self.conveyor = Conveyor() # identify the workflow type self.conveyor.setOrigin('learn') # generate a unique modelID self.conveyor.addMeta('modelID',utils.id_generator()) LOG.debug(f'Generated new model with modelID: {self.conveyor.getMeta("modelID")}') # load parameters if param_file is not None: # use the param_file to update existing parameters at the model # directory and save changes to make them persistent success, message = self.param.delta(model, 0, param_file, iformat='YAML') elif param_string is not None: success, message = self.param.delta(model, 0, param_string, iformat='JSONS') else: # load parameter file at the model directory success, message = self.param.loadYaml(model, 0) # being unable to load parameters is a critical error if not success: LOG.critical(f'Unable to load model parameters. {message}. Aborting...') sys.exit(1) # add additional output formats included in the constructor # this is requiered to add JSON format as output when the object is # instantiated from a web service call, requiring this output if output_format is not None: if output_format not in self.param.getVal('output_format'): self.param.appVal('output_format',output_format) if self.param.getVal('confidential'): self.confidentialAuditParam() def confidentialAuditParam (self): import yaml original_method = self.param.getVal('model') if self.param.getVal ('quantitative'): if original_method != 'PLSR': self.param.setVal('model', 'PLSR') LOG.info (f'CONFIDENTIALITY AUDIT: the model was set to PLSR, ' f'the original method {original_method} was not suitable to build confidential models') else: if original_method != 'PLSDA': self.param.setVal('model', 'PLSDA') LOG.info (f'CONFIDENTIALITY AUDIT: the model was set to PLSDA, ' f'the original method {original_method} was not suitable to build confidential models') # TODO: conformal support if self.param.getVal('conformal'): self.param.setVal('conformal', False) LOG.info ('CONFIDENTIALITY AUDIT: conformal was set to False. ' 'Conformal models are not supported for now in confidential models') parameters_file_path = utils.model_path(self.model, 0) parameters_file_name = os.path.join (parameters_file_path, 'parameters.yaml') with open(parameters_file_name, 'w') as pfile: yaml.dump (self.param.p, pfile) def get_ensemble(self): ''' Returns a Boolean indicating if the model uses external input sources and a list with these sources ''' return self.param.getEnsemble() def extend_modelID (self, ensembleID): modelID = self.conveyor.getMeta('modelID') modelID = f'{modelID}-{ensembleID}' self.conveyor.addMeta('modelID', modelID) LOG.debug (f'modelID re-defined as {self.conveyor.getVal("modelID")}') def set_single_CPU(self) -> None: ''' Forces the use of a single CPU ''' LOG.debug('parameter "numCPUs" forced to be 1') self.param.setVal('numCPUs',1) def run(self, input_source): ''' Executes a default predicton workflow ''' # path to endpoint epd = utils.model_path(self.model, 0) # if not os.path.isdir(epd): # self.conveyor.setError(f'Unable to find model {self.model}') # #LOG.error(f'Unable to find model {self.model}') # import ichild classes # if not self.conveyor.getError(): # uses the child classes within the 'model' folder, # to allow customization of the processing applied to each model modpath = utils.module_path(self.model, 0) idata_child = importlib.import_module(modpath+".idata_child") learn_child = importlib.import_module(modpath+".learn_child") odata_child = importlib.import_module(modpath+".odata_child") # run idata object, in charge of generate model data from input try: idata = idata_child.IdataChild(self.param, self.conveyor, input_source) except: LOG.warning ('Idata child architecture mismatch, defaulting to Idata parent') idata = Idata(self.param, self.conveyor, input_source) idata.run() LOG.debug(f'idata child {type(idata).__name__} completed `run()`') if not self.conveyor.getError(): success, results = idata.preprocess_create() if not success: self.conveyor.setError(results) if not self.conveyor.getError(): # check there is a suitable X and Y if not self.conveyor.isKey ('xmatrix'): self.conveyor.setError(f'Failed to compute MDs') if not self.conveyor.isKey ('ymatrix'): self.conveyor.setError(f'No activity data (Y) found in training series') # run optional chemical space building for supporting "closest" training series object # if self.param.getVal('buildSimilarity'): if self.param.getVal('output_similar') is True: from flame.slearn import Slearn slearn_child = importlib.import_module(modpath+".slearn_child") if not self.conveyor.getError(): # instantiate learn (build a space from idata) and run it try: slearn = slearn_child.SlearnChild(self.param, self.conveyor) except: LOG.warning ('Slearn child architecture mismatch, defaulting to Learn parent') slearn = Slearn(self.param, self.conveyor) slearn.run() LOG.debug(f'slearn child {type(slearn).__name__} completed `run()`') if not self.conveyor.getError(): # instantiate learn (build a model from idata) and run it try: learn = learn_child.LearnChild(self.param, self.conveyor) except: LOG.warning ('Learn child architecture mismatch, defaulting to Learn parent') learn = Learn(self.param, self.conveyor) learn.run() LOG.debug(f'learn child {type(learn).__name__} completed `run()`') # run odata object, in charge of formatting the prediction results # note that if any of the above steps failed, an error has been inserted in the # conveyor and odata will take case of showing an error message try: odata = odata_child.OdataChild(self.param, self.conveyor) except: LOG.warning ('Odata child architecture mismatch, defaulting to Odata parent') odata = Odata(self.param, self.conveyor) return odata.run()