Esempio n. 1
0
class Search:

    def __init__(self, space, version, output_format=None, label=None):
        LOG.debug('Starting search...')
        self.space = space
        self.version = version
        self.label = label
        self.param = Parameters()
        self.conveyor = Conveyor()

        # identify the workflow type
        self.conveyor.setOrigin('sapply')

        # load modelID
        path = utils.space_path(space, version)
        meta = os.path.join(path,'space-meta.pkl')
        try:
            with open(meta, 'rb') as handle:
                modelID = pickle.load(handle)
        except:
            LOG.critical(f'Unable to load modelID from {meta}. Aborting...')
            sys.exit()

        self.conveyor.addMeta('modelID', modelID)
        LOG.debug (f'Loaded space with modelID: {modelID}')

        # assign prediction (search) label
        self.conveyor.addVal(label, 'prediction_label', 'prediction label',
            'method', 'single',
            'Label used to identify the prediction')

        success, results = self.param.loadYaml(space, version, isSpace=True)
        if not success:
            LOG.critical(f'Unable to load space parameters. {results}. Aborting...')
            sys.exit()

        # add additional output formats included in the constructor 
        # this is requiered to add JSON format as output when the object is
        # instantiated from a web service call, requiring this output   
        if output_format != None:
            if output_format not in self.param.getVal('output_format'):
                self.param.appVal('output_format',output_format)
 
        return

    def set_single_CPU(self) -> None:
        ''' Forces the use of a single CPU '''
        LOG.debug('parameter "numCPUs" forced to be 1')
        self.param.setVal('numCPUs',1)

    def getVal (self, idict, ikey):
        if not ikey in idict:
            return None
        return idict[ikey]

    # def run(self, input_source, runtime_param=None, metric=None, numsel=None, cutoff=None):
    def run(self, param_dict):
        ''' Executes a default predicton workflow '''

        metric = None
        numsel = None
        cutoff = None
        
        # path to endpoint
        epd = utils.space_path(self.space, self.version)
        if not os.path.isdir(epd):
            LOG.error(f'Unable to find space {self.space}')
            self.conveyor.setError(f'Unable to find space {self.space}, version {self.version}')

        if self.getVal(param_dict,'smarts') is not None:
            input_source = param_dict['smarts']
            self.param.setVal('input_type', 'smarts')

        elif self.getVal(param_dict,'infile') is not None:
            input_source = param_dict['infile']

        else:
            LOG.error(f'Unable to find input_file')
            self.conveyor.setError('wrong format in the runtime similarity parameters')

        if 'runtime_param' in param_dict:
            runtime_param = self.getVal(param_dict, 'runtime_param')
            if runtime_param is not None:
                LOG.info (f'runtime parameters: {str(runtime_param)}')
                try:
                    with open(runtime_param, 'r') as pfile:
                        rtparam = yaml.safe_load(pfile)
                        try:
                            metric = rtparam['similarity_metric']
                            numsel = rtparam['similarity_cutoff_num']
                            cutoff = rtparam['similarity_cutoff_distance']
                        except:
                            LOG.error('wrong format in the runtime similarity parameters')
                            self.conveyor.setError('wrong format in the runtime similarity parameters')
                except:
                    LOG.error('runtime similarity parameter file not found')
                    self.conveyor.setError('runtime similarity parameter file not found')
        else:
            try:
                metric = param_dict['metric']
                numsel = param_dict['numsel']
                cutoff = param_dict['cutoff']
            except:
                LOG.error('wrong format in the runtime similarity parameters')
                self.conveyor.setError('wrong format in the runtime similarity parameters')

        md = self.param.getVal('computeMD_method')
        if utils.isFingerprint(md) and len(md) > 1:
            LOG.warning(f'When using fingerprints, only a single type of MD can be used to build spaces. Selecting {md[0]}')
            self.conveyor.setWarning(f'When using fingerprints, only a single type of MD can be used to build spaces. Selecting {md[0]}')
            self.param.setVal('computeMD_method',[md[0]])

        if not self.conveyor.getError():
            # uses the child classes within the 'space' folder,
            # to allow customization of
            # the processing applied to each space
            modpath = utils.smodule_path(self.space, self.version)

            idata_child = importlib.import_module(modpath+".idata_child")
            sapply_child = importlib.import_module(modpath+".sapply_child")
            odata_child = importlib.import_module(modpath+".odata_child")

            # run idata object, in charge of generate space data from input
            try:
                idata = idata_child.IdataChild(self.param, self.conveyor, input_source)
            except:
                LOG.warning ('Idata child architecture mismatch, defaulting to Idata parent')
                idata = Idata(self.param, self.conveyor, input_source)

            idata.run()
            LOG.debug(f'idata child {type(idata).__name__} completed `run()`')

        if not self.conveyor.getError():

            # make sure there is X data
            if not self.conveyor.isKey('xmatrix'):
                if not self.conveyor.isKey ('SMARTS'):
                    LOG.debug(f'Failed to compute MDs')
                    self.conveyor.setError(f'Failed to compute MDs')

        if not self.conveyor.getError():
            # run apply object, in charge of generate a prediction from idata
            try:
                sapply = sapply_child.SapplyChild(self.param, self.conveyor)
            except:
                LOG.warning ('Sapply child architecture mismatch, defaulting to Sapply parent')
                sapply = Sapply(self.param, self.conveyor)

            sapply.run(cutoff, numsel, metric)
            LOG.debug(f'sapply child {type(sapply).__name__} completed `run()`')

        # run odata object, in charge of formatting the prediction results
        # note that if any of the above steps failed, an error has been inserted in the
        # conveyor and odata will take case of showing an error message

        try:
            odata = odata_child.OdataChild(self.param, self.conveyor)
        except:
            LOG.warning ('Odata child architecture mismatch, defaulting to Odata parent')
            odata = Odata(self.param, self.conveyor)

        return odata.run()
Esempio n. 2
0
class Sbuild:
    def __init__(self,
                 space,
                 param_file=None,
                 param_string=None,
                 output_format=None):
        LOG.debug('Starting sbuild...')
        self.space = space
        self.param = Parameters()
        self.conveyor = Conveyor()

        # identify the workflow type
        self.conveyor.setOrigin('slearn')

        # generate a unique modelID
        self.conveyor.addMeta('modelID', utils.id_generator())
        LOG.debug(
            f'Generated new space with modelID: {self.conveyor.getMeta("modelID")}'
        )

        # load parameters
        if param_file is not None:
            # use the param_file to update existing parameters at the space
            # directory and save changes to make them persistent
            success, message = self.param.delta(space,
                                                0,
                                                param_file,
                                                iformat='YAML',
                                                isSpace=True)

        elif param_string is not None:
            success, message = self.param.delta(space,
                                                0,
                                                param_string,
                                                iformat='JSONS',
                                                isSpace=True)

        else:
            # load parameter file at the space directory
            success, message = self.param.loadYaml(space, 0, isSpace=True)

        # being unable to load parameters is a critical error
        if not success:
            LOG.critical(
                f'Unable to load space parameters. {message}. Aborting...')
            sys.exit(1)

        md = self.param.getVal('computeMD_method')
        if utils.isFingerprint(md) and len(md) > 1:
            LOG.warning(
                f'When using fingerprints, only a single type of MD can be used to build spaces. Selecting {md[0]}'
            )
            self.conveyor.setWarning(
                f'When using fingerprints, only a single type of MD can be used to build spaces. Selecting {md[0]}'
            )
            self.param.setVal('computeMD_method', [md[0]])

        # add additional output formats included in the constructor
        # this is requiered to add JSON format as output when the object is
        # instantiated from a web service call, requiring this output
        if output_format is not None:
            if output_format not in self.param.getVal('output_format'):
                self.param.appVal('output_format', output_format)

    def set_single_CPU(self) -> None:
        ''' Forces the use of a single CPU '''
        LOG.debug('parameter "numCPUs" forced to be 1')
        self.param.setVal('numCPUs', 1)

    def run(self, input_source):
        ''' Executes a default chemical space building workflow '''

        # path to endpoint
        epd = utils.space_path(self.space, 0)
        if not os.path.isdir(epd):
            self.conveyor.setError(f'Unable to find space {self.space}')
            #LOG.error(f'Unable to find space {self.space}')

        # import ichild classes
        if not self.conveyor.getError():
            # uses the child classes within the 'space' folder,
            # to allow customization of  the processing applied to each space
            modpath = utils.smodule_path(self.space, 0)

            idata_child = importlib.import_module(modpath + ".idata_child")
            slearn_child = importlib.import_module(modpath + ".slearn_child")
            odata_child = importlib.import_module(modpath + ".odata_child")

            # run idata object, in charge of generate space data from input
            try:
                idata = idata_child.IdataChild(self.param, self.conveyor,
                                               input_source)
            except:
                LOG.warning(
                    'Idata child architecture mismatch, defaulting to Idata parent'
                )
                idata = Idata(self.param, self.conveyor, input_source)

            idata.run()
            LOG.debug(f'idata child {type(idata).__name__} completed `run()`')

        if not self.conveyor.getError():
            success, results = idata.preprocess_create()
            if not success:
                self.conveyor.setError(results)

        if not self.conveyor.getError():
            # check there is a suitable X and Y
            if not self.conveyor.isKey('xmatrix'):
                self.conveyor.setError(f'Failed to compute MDs')

        if not self.conveyor.getError():
            # instantiate learn (build a space from idata) and run it
            try:
                slearn = slearn_child.SlearnChild(self.param, self.conveyor)
            except:
                LOG.warning(
                    'Slearn child architecture mismatch, defaulting to Learn parent'
                )
                slearn = Slearn(self.param, self.conveyor)

            slearn.run()
            LOG.debug(
                f'slearn child {type(slearn).__name__} completed `run()`')

        # run odata object, in charge of formatting the prediction results
        # note that if any of the above steps failed, an error has been inserted in the
        # conveyor and odata will take case of showing an error message
        try:
            odata = odata_child.OdataChild(self.param, self.conveyor)
        except:
            LOG.warning(
                'Odata child architecture mismatch, defaulting to Odata parent'
            )
            odata = Odata(self.param, self.conveyor)

        return odata.run()