Beispiel #1
0
class Predict:

    def __init__(self, model, version=0, output_format=None, label=None):
        LOG.debug('Starting predict...')
        self.model = model
        self.version = version
        self.param = Parameters()
        self.conveyor = Conveyor()

        # identify the workflow type
        self.conveyor.setOrigin('apply')

        # load modelID
        success, result = utils.getModelID(model, version, 'model')
        if not success:
            LOG.critical(f'{result}. Aborting...')
            sys.exit()

        self.conveyor.addMeta('modelID', result)
        LOG.debug (f'Loaded model with modelID: {result}')

        # assign prediction label
        self.conveyor.addVal(label, 'prediction_label', 'prediction label',
                    'method', 'single',
                    'Label used to identify the prediction')

        success, results = self.param.loadYaml(model, version)
        if not success:
            LOG.critical(f'Unable to load model parameters. {results}. Aborting...')
            sys.exit()

        # add additional output formats included in the constructor 
        # this is requiered to add JSON format as output when the object is
        # instantiated from a web service call, requiring this output   
        if output_format != None:
            if output_format not in self.param.getVal('output_format'):
                self.param.appVal('output_format',output_format)
 
            if 'ghost' in output_format:
                self.param.setVal('output_similar', False)

        return

    def get_ensemble(self):
        ''' Returns a Boolean indicating if the model uses external input
            sources and a list with these sources '''
        return self.param.getEnsemble()

    def set_single_CPU(self) -> None:
        ''' Forces the use of a single CPU '''
        LOG.debug('parameter "numCPUs" forced to be 1')
        self.param.setVal('numCPUs',1)

    def run(self, input_source):
        ''' Executes a default predicton workflow '''

        # path to endpoint
        endpoint = utils.model_path(self.model, self.version)
        
        # if not os.path.isdir(endpoint):
        #     self.conveyor.setError(f'Unable to find model {self.model}, version {self.version}')
        #     #LOG.error(f'Unable to find model {self.model}')

        # if not self.conveyor.getError():
        # uses the child classes within the 'model' folder,
        # to allow customization of
        # the processing applied to each model
        modpath = utils.module_path(self.model, self.version)

        idata_child = importlib.import_module(modpath+".idata_child")
        apply_child = importlib.import_module(modpath+".apply_child")
        odata_child = importlib.import_module(modpath+".odata_child")

        # run idata object, in charge of generate model data from input
        try:
            idata = idata_child.IdataChild(self.param, self.conveyor, input_source)
        except:
            LOG.warning ('Idata child architecture mismatch, defaulting to Idata parent')
            idata = Idata(self.param, self.conveyor, input_source)

        idata.run()
        LOG.debug(f'idata child {type(idata).__name__} completed `run()`')

        if not self.conveyor.getError():
            success, results = idata.preprocess_apply()
            if not success:
                self.conveyor.setError(results)

        if not self.conveyor.getError():
            # make sure there is X data
            if not self.conveyor.isKey('xmatrix'):
                LOG.debug(f'Failed to compute MDs')
                self.conveyor.setError(f'Failed to compute MDs')

        # for secret models avoid searching similar compounds
        space_pkl = os.path.join(endpoint,'space.pkl')
        if not os.path.isfile(space_pkl):
            self.param.setVal('output_similar', False)

        if not self.conveyor.getError():
            if self.param.getVal('output_similar') is True:

                from flame.sapply import Sapply

                metric = self.param.getVal('similarity_metric')
                numsel = self.param.getVal('similarity_cutoff_num')
                cutoff = self.param.getVal('similarity_cutoff_distance')
                
                # sapply = Sapply(self.param, self.conveyor)

                sapply_child = importlib.import_module(modpath+".sapply_child")

                # run apply object, in charge of generate a prediction from idata
                try:
                    sapply = sapply_child.SapplyChild(self.param, self.conveyor)
                except:
                    LOG.warning ('Sapply child architecture mismatch, defaulting to Sapply parent')
                    sapply = Sapply(self.param, self.conveyor)

                sapply.run(cutoff, numsel, metric)
                LOG.debug(f'sapply child {type(sapply).__name__} completed `run()`')

        if not self.conveyor.getError():
            # run apply object, in charge of generate a prediction from idata
            try:
                apply = apply_child.ApplyChild(self.param, self.conveyor)
            except:
                LOG.warning ('Apply child architecture mismatch, defaulting to Apply parent')
                apply = Apply(self.param, self.conveyor)

            apply.run()
            LOG.debug(f'apply child {type(apply).__name__} completed `run()`')

        # run odata object, in charge of formatting the prediction results
        # note that if any of the above steps failed, an error has been inserted in the
        # conveyor and odata will take case of showing an error message
        try:
            odata = odata_child.OdataChild(self.param, self.conveyor)
        except:
            LOG.warning ('Odata child architecture mismatch, defaulting to Odata parent')
            odata = Odata(self.param, self.conveyor)

        return odata.run()
Beispiel #2
0
class Search:

    def __init__(self, space, version, output_format=None, label=None):
        LOG.debug('Starting search...')
        self.space = space
        self.version = version
        self.label = label
        self.param = Parameters()
        self.conveyor = Conveyor()

        # identify the workflow type
        self.conveyor.setOrigin('sapply')

        # load modelID
        path = utils.space_path(space, version)
        meta = os.path.join(path,'space-meta.pkl')
        try:
            with open(meta, 'rb') as handle:
                modelID = pickle.load(handle)
        except:
            LOG.critical(f'Unable to load modelID from {meta}. Aborting...')
            sys.exit()

        self.conveyor.addMeta('modelID', modelID)
        LOG.debug (f'Loaded space with modelID: {modelID}')

        # assign prediction (search) label
        self.conveyor.addVal(label, 'prediction_label', 'prediction label',
            'method', 'single',
            'Label used to identify the prediction')

        success, results = self.param.loadYaml(space, version, isSpace=True)
        if not success:
            LOG.critical(f'Unable to load space parameters. {results}. Aborting...')
            sys.exit()

        # add additional output formats included in the constructor 
        # this is requiered to add JSON format as output when the object is
        # instantiated from a web service call, requiring this output   
        if output_format != None:
            if output_format not in self.param.getVal('output_format'):
                self.param.appVal('output_format',output_format)
 
        return

    def set_single_CPU(self) -> None:
        ''' Forces the use of a single CPU '''
        LOG.debug('parameter "numCPUs" forced to be 1')
        self.param.setVal('numCPUs',1)

    def getVal (self, idict, ikey):
        if not ikey in idict:
            return None
        return idict[ikey]

    # def run(self, input_source, runtime_param=None, metric=None, numsel=None, cutoff=None):
    def run(self, param_dict):
        ''' Executes a default predicton workflow '''

        metric = None
        numsel = None
        cutoff = None
        
        # path to endpoint
        epd = utils.space_path(self.space, self.version)
        if not os.path.isdir(epd):
            LOG.error(f'Unable to find space {self.space}')
            self.conveyor.setError(f'Unable to find space {self.space}, version {self.version}')

        if self.getVal(param_dict,'smarts') is not None:
            input_source = param_dict['smarts']
            self.param.setVal('input_type', 'smarts')

        elif self.getVal(param_dict,'infile') is not None:
            input_source = param_dict['infile']

        else:
            LOG.error(f'Unable to find input_file')
            self.conveyor.setError('wrong format in the runtime similarity parameters')

        if 'runtime_param' in param_dict:
            runtime_param = self.getVal(param_dict, 'runtime_param')
            if runtime_param is not None:
                LOG.info (f'runtime parameters: {str(runtime_param)}')
                try:
                    with open(runtime_param, 'r') as pfile:
                        rtparam = yaml.safe_load(pfile)
                        try:
                            metric = rtparam['similarity_metric']
                            numsel = rtparam['similarity_cutoff_num']
                            cutoff = rtparam['similarity_cutoff_distance']
                        except:
                            LOG.error('wrong format in the runtime similarity parameters')
                            self.conveyor.setError('wrong format in the runtime similarity parameters')
                except:
                    LOG.error('runtime similarity parameter file not found')
                    self.conveyor.setError('runtime similarity parameter file not found')
        else:
            try:
                metric = param_dict['metric']
                numsel = param_dict['numsel']
                cutoff = param_dict['cutoff']
            except:
                LOG.error('wrong format in the runtime similarity parameters')
                self.conveyor.setError('wrong format in the runtime similarity parameters')

        md = self.param.getVal('computeMD_method')
        if utils.isFingerprint(md) and len(md) > 1:
            LOG.warning(f'When using fingerprints, only a single type of MD can be used to build spaces. Selecting {md[0]}')
            self.conveyor.setWarning(f'When using fingerprints, only a single type of MD can be used to build spaces. Selecting {md[0]}')
            self.param.setVal('computeMD_method',[md[0]])

        if not self.conveyor.getError():
            # uses the child classes within the 'space' folder,
            # to allow customization of
            # the processing applied to each space
            modpath = utils.smodule_path(self.space, self.version)

            idata_child = importlib.import_module(modpath+".idata_child")
            sapply_child = importlib.import_module(modpath+".sapply_child")
            odata_child = importlib.import_module(modpath+".odata_child")

            # run idata object, in charge of generate space data from input
            try:
                idata = idata_child.IdataChild(self.param, self.conveyor, input_source)
            except:
                LOG.warning ('Idata child architecture mismatch, defaulting to Idata parent')
                idata = Idata(self.param, self.conveyor, input_source)

            idata.run()
            LOG.debug(f'idata child {type(idata).__name__} completed `run()`')

        if not self.conveyor.getError():

            # make sure there is X data
            if not self.conveyor.isKey('xmatrix'):
                if not self.conveyor.isKey ('SMARTS'):
                    LOG.debug(f'Failed to compute MDs')
                    self.conveyor.setError(f'Failed to compute MDs')

        if not self.conveyor.getError():
            # run apply object, in charge of generate a prediction from idata
            try:
                sapply = sapply_child.SapplyChild(self.param, self.conveyor)
            except:
                LOG.warning ('Sapply child architecture mismatch, defaulting to Sapply parent')
                sapply = Sapply(self.param, self.conveyor)

            sapply.run(cutoff, numsel, metric)
            LOG.debug(f'sapply child {type(sapply).__name__} completed `run()`')

        # run odata object, in charge of formatting the prediction results
        # note that if any of the above steps failed, an error has been inserted in the
        # conveyor and odata will take case of showing an error message

        try:
            odata = odata_child.OdataChild(self.param, self.conveyor)
        except:
            LOG.warning ('Odata child architecture mismatch, defaulting to Odata parent')
            odata = Odata(self.param, self.conveyor)

        return odata.run()
Beispiel #3
0
class Build:
    def __init__(self,
                 model,
                 param_file=None,
                 param_string=None,
                 output_format=None):
        LOG.debug('Starting build...')
        self.model = model
        self.param = Parameters()
        self.conveyor = Conveyor()

        # load parameters
        if param_file is not None:
            # use the param_file to update existing parameters at the model
            # directory and save changes to make them persistent
            success, message = self.param.delta(model,
                                                0,
                                                param_file,
                                                iformat='YAML')

        elif param_string is not None:
            success, message = self.param.delta(model,
                                                0,
                                                param_string,
                                                iformat='JSONS')

        else:
            # load parameter file at the model directory
            success, message = self.param.loadYaml(model, 0)

        # being unable to load parameters is a critical error
        if not success:
            LOG.critical(
                f'Unable to load model parameters. "{message}" Aborting...')
            sys.exit(1)

        # add additional output formats included in the constructor
        # this is requiered to add JSON format as output when the object is
        # instantiated from a web service call, requiring this output
        if output_format is not None:
            if output_format not in self.param.getVal('output_format'):
                self.param.appVal('output_format', output_format)

    def get_ensemble(self):
        ''' Returns a Boolean indicating if the model uses external input
            sources and a list with these sources '''
        return self.param.getEnsemble()

    def set_single_CPU(self) -> None:
        ''' Forces the use of a single CPU '''
        LOG.debug('parameter "numCPUs" forced to be 1')
        self.param.setVal('numCPUs', 1)

    def run(self, input_source):
        ''' Executes a default predicton workflow '''

        # path to endpoint
        epd = utils.model_path(self.model, 0)
        if not os.path.isdir(epd):
            self.conveyor.setError(f'Unable to find model {self.model}')
            #LOG.error(f'Unable to find model {self.model}')

        # import ichild classes
        if not self.conveyor.getError():
            # uses the child classes within the 'model' folder,
            # to allow customization of  the processing applied to each model
            modpath = utils.module_path(self.model, 0)

            idata_child = importlib.import_module(modpath + ".idata_child")
            learn_child = importlib.import_module(modpath + ".learn_child")
            odata_child = importlib.import_module(modpath + ".odata_child")

            # run idata object, in charge of generate model data from input
            try:
                idata = idata_child.IdataChild(self.param, self.conveyor,
                                               input_source)
            except:
                LOG.warning(
                    'Idata child architecture mismatch, defaulting to Idata parent'
                )
                idata = Idata(self.param, self.conveyor, input_source)
            idata.run()
            LOG.debug(f'idata child {type(idata).__name__} completed `run()`')

        if not self.conveyor.getError():
            # check there is a suitable X and Y
            if not self.conveyor.isKey('xmatrix'):
                self.conveyor.setError(f'Failed to compute MDs')

            if not self.conveyor.isKey('ymatrix'):
                self.conveyor.setError(
                    f'No activity data (Y) found in training series')

        if not self.conveyor.getError():
            # instantiate learn (build a model from idata) and run it
            learn = learn_child.LearnChild(self.param, self.conveyor)
            learn.run()

            try:
                learn = learn_child.LearnChild(self.param, self.conveyor)
            except:
                LOG.warning(
                    'Learn child architecture mismatch, defaulting to Learn parent'
                )
                learn = Learn(self.param, self.conveyor)

            LOG.debug(f'learn child {type(learn).__name__} completed `run()`')

        # run odata object, in charge of formatting the prediction results
        # note that if any of the above steps failed, an error has been inserted in the
        # conveyor and odata will take case of showing an error message
        try:
            odata = odata_child.OdataChild(self.param, self.conveyor)
        except:
            LOG.warning(
                'Odata child architecture mismatch, defaulting to Odata parent'
            )
            odata = Odata(self.param, self.conveyor)

        return odata.run()
Beispiel #4
0
class Sbuild:
    def __init__(self,
                 space,
                 param_file=None,
                 param_string=None,
                 output_format=None):
        LOG.debug('Starting sbuild...')
        self.space = space
        self.param = Parameters()
        self.conveyor = Conveyor()

        # identify the workflow type
        self.conveyor.setOrigin('slearn')

        # generate a unique modelID
        self.conveyor.addMeta('modelID', utils.id_generator())
        LOG.debug(
            f'Generated new space with modelID: {self.conveyor.getMeta("modelID")}'
        )

        # load parameters
        if param_file is not None:
            # use the param_file to update existing parameters at the space
            # directory and save changes to make them persistent
            success, message = self.param.delta(space,
                                                0,
                                                param_file,
                                                iformat='YAML',
                                                isSpace=True)

        elif param_string is not None:
            success, message = self.param.delta(space,
                                                0,
                                                param_string,
                                                iformat='JSONS',
                                                isSpace=True)

        else:
            # load parameter file at the space directory
            success, message = self.param.loadYaml(space, 0, isSpace=True)

        # being unable to load parameters is a critical error
        if not success:
            LOG.critical(
                f'Unable to load space parameters. {message}. Aborting...')
            sys.exit(1)

        md = self.param.getVal('computeMD_method')
        if utils.isFingerprint(md) and len(md) > 1:
            LOG.warning(
                f'When using fingerprints, only a single type of MD can be used to build spaces. Selecting {md[0]}'
            )
            self.conveyor.setWarning(
                f'When using fingerprints, only a single type of MD can be used to build spaces. Selecting {md[0]}'
            )
            self.param.setVal('computeMD_method', [md[0]])

        # add additional output formats included in the constructor
        # this is requiered to add JSON format as output when the object is
        # instantiated from a web service call, requiring this output
        if output_format is not None:
            if output_format not in self.param.getVal('output_format'):
                self.param.appVal('output_format', output_format)

    def set_single_CPU(self) -> None:
        ''' Forces the use of a single CPU '''
        LOG.debug('parameter "numCPUs" forced to be 1')
        self.param.setVal('numCPUs', 1)

    def run(self, input_source):
        ''' Executes a default chemical space building workflow '''

        # path to endpoint
        epd = utils.space_path(self.space, 0)
        if not os.path.isdir(epd):
            self.conveyor.setError(f'Unable to find space {self.space}')
            #LOG.error(f'Unable to find space {self.space}')

        # import ichild classes
        if not self.conveyor.getError():
            # uses the child classes within the 'space' folder,
            # to allow customization of  the processing applied to each space
            modpath = utils.smodule_path(self.space, 0)

            idata_child = importlib.import_module(modpath + ".idata_child")
            slearn_child = importlib.import_module(modpath + ".slearn_child")
            odata_child = importlib.import_module(modpath + ".odata_child")

            # run idata object, in charge of generate space data from input
            try:
                idata = idata_child.IdataChild(self.param, self.conveyor,
                                               input_source)
            except:
                LOG.warning(
                    'Idata child architecture mismatch, defaulting to Idata parent'
                )
                idata = Idata(self.param, self.conveyor, input_source)

            idata.run()
            LOG.debug(f'idata child {type(idata).__name__} completed `run()`')

        if not self.conveyor.getError():
            success, results = idata.preprocess_create()
            if not success:
                self.conveyor.setError(results)

        if not self.conveyor.getError():
            # check there is a suitable X and Y
            if not self.conveyor.isKey('xmatrix'):
                self.conveyor.setError(f'Failed to compute MDs')

        if not self.conveyor.getError():
            # instantiate learn (build a space from idata) and run it
            try:
                slearn = slearn_child.SlearnChild(self.param, self.conveyor)
            except:
                LOG.warning(
                    'Slearn child architecture mismatch, defaulting to Learn parent'
                )
                slearn = Slearn(self.param, self.conveyor)

            slearn.run()
            LOG.debug(
                f'slearn child {type(slearn).__name__} completed `run()`')

        # run odata object, in charge of formatting the prediction results
        # note that if any of the above steps failed, an error has been inserted in the
        # conveyor and odata will take case of showing an error message
        try:
            odata = odata_child.OdataChild(self.param, self.conveyor)
        except:
            LOG.warning(
                'Odata child architecture mismatch, defaulting to Odata parent'
            )
            odata = Odata(self.param, self.conveyor)

        return odata.run()
Beispiel #5
0
class Predict:
    def __init__(self, model, version=0, output_format=None, label=None):
        LOG.debug('Starting predict...')
        self.model = model
        self.version = version
        self.param = Parameters()
        self.conveyor = Conveyor()

        self.conveyor.addVal(label, 'prediction_label', 'prediction label',
                             'method', 'single',
                             'Label used to identify the prediction')

        if not self.param.loadYaml(model, version):
            LOG.critical('Unable to load model parameters. Aborting...')
            sys.exit()

        # add additional output formats included in the constructor
        # this is requiered to add JSON format as output when the object is
        # instantiated from a web service call, requiring this output
        if output_format != None:
            if output_format not in self.param.getVal('output_format'):
                self.param.appVal('output_format', output_format)

        return

    def get_ensemble(self):
        ''' Returns a Boolean indicating if the model uses external input
            sources and a list with these sources '''
        return self.param.getEnsemble()

    def set_single_CPU(self) -> None:
        ''' Forces the use of a single CPU '''
        LOG.debug('parameter "numCPUs" forced to be 1')
        self.param.setVal('numCPUs', 1)

    def run(self, input_source):
        ''' Executes a default predicton workflow '''

        # path to endpoint
        # path to endpoint
        endpoint = utils.model_path(self.model, self.version)
        if not os.path.isdir(endpoint):
            self.conveyor.setError(
                f'Unable to find model {self.model}, version {self.version}')
            #LOG.error(f'Unable to find model {self.model}')

        if not self.conveyor.getError():
            # uses the child classes within the 'model' folder,
            # to allow customization of
            # the processing applied to each model
            modpath = utils.module_path(self.model, self.version)

            idata_child = importlib.import_module(modpath + ".idata_child")
            apply_child = importlib.import_module(modpath + ".apply_child")
            odata_child = importlib.import_module(modpath + ".odata_child")

            # run idata object, in charge of generate model data from input
            try:
                idata = idata_child.IdataChild(self.param, self.conveyor,
                                               input_source)
            except:
                LOG.warning(
                    'Idata child architecture mismatch, defaulting to Idata parent'
                )
                idata = Idata(self.param, self.conveyor, input_source)

            idata.run()
            LOG.debug(f'idata child {type(idata).__name__} completed `run()`')

        if not self.conveyor.getError():
            # make sure there is X data
            if not self.conveyor.isKey('xmatrix'):
                LOG.debug(f'Failed to compute MDs')
                self.conveyor.setError(f'Failed to compute MDs')

        if not self.conveyor.getError():
            # run apply object, in charge of generate a prediction from idata
            try:
                apply = apply_child.ApplyChild(self.param, self.conveyor)
            except:
                LOG.warning(
                    'Apply child architecture mismatch, defaulting to Apply parent'
                )
                apply = Apply(self.param, self.conveyor)

            apply.run()
            LOG.debug(f'apply child {type(apply).__name__} completed `run()`')

        # run odata object, in charge of formatting the prediction results
        # note that if any of the above steps failed, an error has been inserted in the
        # conveyor and odata will take case of showing an error message
        try:
            odata = odata_child.OdataChild(self.param, self.conveyor)
        except:
            LOG.warning(
                'Odata child architecture mismatch, defaulting to Odata parent'
            )
            odata = Odata(self.param, self.conveyor)

        return odata.run()
Beispiel #6
0
class Search:
    def __init__(self, space, version, output_format=None, label=None):
        LOG.debug('Starting predict...')
        self.space = space
        self.version = version
        self.label = label
        self.param = Parameters()
        self.conveyor = Conveyor()

        self.conveyor.addVal(label, 'prediction_label', 'prediction label',
                             'method', 'single',
                             'Label used to identify the prediction')

        if not self.param.loadYaml(space, version, isSpace=True):
            LOG.critical('Unable to load space parameters. Aborting...')
            sys.exit()

        # add additional output formats included in the constructor
        # this is requiered to add JSON format as output when the object is
        # instantiated from a web service call, requiring this output
        if output_format != None:
            if output_format not in self.param.getVal('output_format'):
                self.param.appVal('output_format', output_format)

        return

    def set_single_CPU(self) -> None:
        ''' Forces the use of a single CPU '''
        LOG.debug('parameter "numCPUs" forced to be 1')
        self.param.setVal('numCPUs', 1)

    # def run(self, input_source, runtime_param=None, metric=None, numsel=None, cutoff=None):
    def run(self, param_dict):
        ''' Executes a default predicton workflow '''

        print('*********', param_dict)

        metric = None
        numsel = None
        cutoff = None

        # path to endpoint
        epd = utils.space_path(self.space, self.version)
        if not os.path.isdir(epd):
            self.conveyor.setError(
                f'Unable to find space {self.space}, version {self.version}')
            #LOG.error(f'Unable to find space {self.space}')

        if 'infile' in param_dict:
            input_source = param_dict['infile']
        else:
            LOG.error(f'Unable to find input_file')
            self.conveyor.setError(
                'wrong format in the runtime similarity parameters')

        if 'runtime_param' in param_dict:
            runtime_param = param_dict['runtime_param']
            if runtime_param is not None:
                print(runtime_param)
                try:
                    with open(runtime_param, 'r') as pfile:
                        rtparam = yaml.safe_load(pfile)
                        try:
                            metric = rtparam['similarity_metric']
                            numsel = rtparam['similarity_cutoff_num']
                            cutoff = rtparam['similarity_cutoff_distance']
                        except:
                            LOG.error(
                                'wrong format in the runtime similarity parameters'
                            )
                            self.conveyor.setError(
                                'wrong format in the runtime similarity parameters'
                            )
                except:
                    LOG.error('runtime similarity parameter file not found')
                    self.conveyor.setError(
                        'runtime similarity parameter file not found')
        else:
            try:
                metric = param_dict['metric']
                numsel = param_dict['numsel']
                cutoff = param_dict['cutoff']
            except:
                LOG.error('wrong format in the runtime similarity parameters')
                self.conveyor.setError(
                    'wrong format in the runtime similarity parameters')

        if not self.conveyor.getError():
            # uses the child classes within the 'space' folder,
            # to allow customization of
            # the processing applied to each space
            modpath = utils.smodule_path(self.space, self.version)

            idata_child = importlib.import_module(modpath + ".idata_child")
            sapply_child = importlib.import_module(modpath + ".sapply_child")
            odata_child = importlib.import_module(modpath + ".odata_child")

            # run idata object, in charge of generate space data from input
            try:
                idata = idata_child.IdataChild(self.param, self.conveyor,
                                               input_source)
            except:
                LOG.warning(
                    'Idata child architecture mismatch, defaulting to Idata parent'
                )
                idata = Idata(self.param, self.conveyor, input_source)

            idata.run()
            LOG.debug(f'idata child {type(idata).__name__} completed `run()`')

        if not self.conveyor.getError():
            # make sure there is X data
            if not self.conveyor.isKey('xmatrix'):
                LOG.debug(f'Failed to compute MDs')
                self.conveyor.setError(f'Failed to compute MDs')

        if not self.conveyor.getError():
            # run apply object, in charge of generate a prediction from idata
            try:
                sapply = sapply_child.SapplyChild(self.param, self.conveyor)
            except:
                LOG.warning(
                    'Sapply child architecture mismatch, defaulting to Sapply parent'
                )
                sapply = Sapply(self.param, self.conveyor)

            sapply.run(cutoff, numsel, metric)
            LOG.debug(
                f'sapply child {type(sapply).__name__} completed `run()`')

        # run odata object, in charge of formatting the prediction results
        # note that if any of the above steps failed, an error has been inserted in the
        # conveyor and odata will take case of showing an error message
        try:
            odata = odata_child.OdataChild(self.param, self.conveyor,
                                           self.label)
        except:
            LOG.warning(
                'Odata child architecture mismatch, defaulting to Odata parent'
            )
            odata = Odata(self.param, self.conveyor, self.label)

        return odata.run()
Beispiel #7
0
class Build:

    def __init__(self, model, param_file=None, param_string=None, output_format=None):
        LOG.debug('Starting build...')
        self.model = model
        self.param = Parameters()
        
        self.conveyor = Conveyor()

        # identify the workflow type
        self.conveyor.setOrigin('learn')

        # generate a unique modelID
        self.conveyor.addMeta('modelID',utils.id_generator())
        LOG.debug(f'Generated new model with modelID: {self.conveyor.getMeta("modelID")}')

        # load parameters
        if param_file is not None:
            # use the param_file to update existing parameters at the model
            # directory and save changes to make them persistent
            success, message = self.param.delta(model, 0, param_file, iformat='YAML')

        elif param_string is not None:
            success, message = self.param.delta(model, 0, param_string, iformat='JSONS')

        else:
            # load parameter file at the model directory
            success, message = self.param.loadYaml(model, 0)

        # being unable to load parameters is a critical error
        if not success:
            LOG.critical(f'Unable to load model parameters. {message}. Aborting...')
            sys.exit(1)

        # add additional output formats included in the constructor 
        # this is requiered to add JSON format as output when the object is
        # instantiated from a web service call, requiring this output   
        if output_format is not None:
            if output_format not in self.param.getVal('output_format'):
                self.param.appVal('output_format',output_format)

        if self.param.getVal('confidential'):
            self.confidentialAuditParam()
 
    def confidentialAuditParam (self):
        import yaml

        original_method = self.param.getVal('model')
        if self.param.getVal ('quantitative'):
            if original_method != 'PLSR':
                self.param.setVal('model', 'PLSR')
                LOG.info (f'CONFIDENTIALITY AUDIT: the model was set to PLSR, '
                f'the original method {original_method} was not suitable to build confidential models')
        else:
            if original_method != 'PLSDA':
                self.param.setVal('model', 'PLSDA')
                LOG.info (f'CONFIDENTIALITY AUDIT: the model was set to PLSDA, '
                f'the original method {original_method} was not suitable to build confidential models')
        
        # TODO: conformal support
        if self.param.getVal('conformal'):
            self.param.setVal('conformal', False)
            LOG.info ('CONFIDENTIALITY AUDIT: conformal was set to False. '
            'Conformal models are not supported for now in confidential models')

        parameters_file_path = utils.model_path(self.model, 0)
        parameters_file_name = os.path.join (parameters_file_path,
                                            'parameters.yaml')
        with open(parameters_file_name, 'w') as pfile:
            yaml.dump (self.param.p, pfile)

    def get_ensemble(self):
        ''' Returns a Boolean indicating if the model uses external input
            sources and a list with these sources '''
        return self.param.getEnsemble()

    def extend_modelID (self, ensembleID):
        modelID = self.conveyor.getMeta('modelID')
        modelID = f'{modelID}-{ensembleID}'
        self.conveyor.addMeta('modelID', modelID)
        LOG.debug (f'modelID re-defined as {self.conveyor.getVal("modelID")}')

    def set_single_CPU(self) -> None:
        ''' Forces the use of a single CPU '''
        LOG.debug('parameter "numCPUs" forced to be 1')
        self.param.setVal('numCPUs',1)

    def run(self, input_source):
        ''' Executes a default predicton workflow '''

        # path to endpoint
        epd = utils.model_path(self.model, 0)
        # if not os.path.isdir(epd):
        #     self.conveyor.setError(f'Unable to find model {self.model}')
        #     #LOG.error(f'Unable to find model {self.model}')

        # import ichild classes
        # if not self.conveyor.getError():
        # uses the child classes within the 'model' folder,
        # to allow customization of  the processing applied to each model
        modpath = utils.module_path(self.model, 0)

        idata_child = importlib.import_module(modpath+".idata_child")
        learn_child = importlib.import_module(modpath+".learn_child")
        odata_child = importlib.import_module(modpath+".odata_child")

        # run idata object, in charge of generate model data from input
        try:
            idata = idata_child.IdataChild(self.param, self.conveyor, input_source)
        except:
            LOG.warning ('Idata child architecture mismatch, defaulting to Idata parent')
            idata = Idata(self.param, self.conveyor, input_source)
        idata.run() 
        LOG.debug(f'idata child {type(idata).__name__} completed `run()`')

        if not self.conveyor.getError():
            success, results = idata.preprocess_create()
            if not success:
                self.conveyor.setError(results)

        if not self.conveyor.getError():
            # check there is a suitable X and Y
            if not self.conveyor.isKey ('xmatrix'):
                self.conveyor.setError(f'Failed to compute MDs')

            if not self.conveyor.isKey ('ymatrix'):
                self.conveyor.setError(f'No activity data (Y) found in training series')
    
            # run optional chemical space building for supporting "closest" training series object
            # if self.param.getVal('buildSimilarity'):
            if self.param.getVal('output_similar') is True:

                from flame.slearn import Slearn

                slearn_child = importlib.import_module(modpath+".slearn_child")
                
                if not self.conveyor.getError():
                    # instantiate learn (build a space from idata) and run it
                    try:
                        slearn = slearn_child.SlearnChild(self.param, self.conveyor)
                    except:
                        LOG.warning ('Slearn child architecture mismatch, defaulting to Learn parent')
                        slearn = Slearn(self.param, self.conveyor)

                    slearn.run()
                    LOG.debug(f'slearn child {type(slearn).__name__} completed `run()`')

        if not self.conveyor.getError():

            # instantiate learn (build a model from idata) and run it
            try:
                learn = learn_child.LearnChild(self.param, self.conveyor)
            except:
                LOG.warning ('Learn child architecture mismatch, defaulting to Learn parent')
                learn = Learn(self.param, self.conveyor)
            learn.run()

            LOG.debug(f'learn child {type(learn).__name__} completed `run()`')

        # run odata object, in charge of formatting the prediction results
        # note that if any of the above steps failed, an error has been inserted in the
        # conveyor and odata will take case of showing an error message
        try:
            odata = odata_child.OdataChild(self.param, self.conveyor)
        except:
            LOG.warning ('Odata child architecture mismatch, defaulting to Odata parent')
            odata = Odata(self.param, self.conveyor)

        return odata.run()