Example #1
0
    def add_preprocess_step(self, fun, **kwargs):
        """
        Add a preprocessing step after count filtering but before calculating TFA or regression.

        :param fun: Preprocessing function. Can be provided as a string or as a function in `preprocessing.single_cell`.

            "log10" will take the log10 of pseudocounts

            "ln" will take the natural log of pseudocounts

            "log2" will take the log2 of pseudocounts

            "fft" will do the Freeman-Tukey transform

        :type fun: str, `preprocessing.single_cell` function
        :param kwargs: Additional arguments to the preprocessing function
        """
        if self.preprocessing_workflow is None:
            self.preprocessing_workflow = []

        if utils.is_string(fun) and fun.lower() in PREPROCESSING_FUNCTIONS:
            self.preprocessing_workflow.append((PREPROCESSING_FUNCTIONS[fun], kwargs))
        elif utils.is_string(fun) and fun.lower() not in PREPROCESSING_FUNCTIONS:
            raise ValueError("Unable to translate {f} into a function to call".format(f=fun))
        else:
            self.preprocessing_workflow.append((fun, kwargs))
Example #2
0
    def add_gridsearch_parameter(self, param_name, param_vector):
        """
        Set a parameter to search through by exhaustive grid search

        :param param_name: The workflow parameter to change for each run
        :type param_name: str
        :param param_vector: An iterable with values to use for the parameter
        :type param_vector: iterable

        """

        if self.grid_param_values is None:
            self.grid_param_values = {}

        if self.grid_params is None:
            self.grid_params = []

        self.grid_params.append(param_name)

        if utils.is_string(param_vector):
            self.grid_param_values[param_name] = [param_vector]
        else:
            try:
                # There's probably a better way to check and see if something is iterable but I don't care
                [True for _ in param_vector]
                self.grid_param_values[param_name] = param_vector
            except TypeError:
                self.grid_param_values[param_name] = [param_vector]
 def get_metric(cls, metric_ref):
     """
     This wrappers a metric reference so that strings can be used instead of python imports
     Will either return a metric class or will raise an error
     :param metric_ref: str / RankSummingMetric
         String or subclass of RankSummingMetric
     :return: RankSummingMetric
         The metadata parser that corresponds to the string, or the MetadataParser object will be passed through
     """
     if is_string(metric_ref):
         metric_ref = metric_ref.lower()
         if metric_ref == "aupr" or metric_ref == "precision-recall":
             return RankSummaryPR
         if metric_ref == "mcc" or metric_ref == "matthews correlation coefficient":
             return RankSummaryMCC
         if metric_ref == "f1" or metric_ref == "f1 score":
             return RankSummaryF1
         if metric_ref == "combined":
             return CombinedMetric
         else:
             raise ValueError("Parser {parser_str} unknown".format(
                 parser_str=metric_ref))
     elif issubclass(metric_ref, RankSummingMetric):
         return metric_ref
     else:
         raise ValueError(
             "Handler must be a string or a RankSummingMetric class")
Example #4
0
    def set_multiprocess_engine(cls, engine, processes=None):
        """
        Register the multiprocessing engine to use

        Currently available are:

        dask-cluster
        dask-k8
        dask-local
        multiprocessing
        local

        :param engine: A string to lookup the controller or a Controller object
        :type engine: str, Controller
        :param processes: Number of processes to use. Equivalent to calling `set_processes`
        :type processes: int
        """
        if cls.is_initialized:
            raise RuntimeError(
                "Client is currently active. Run .shutdown() before changing engines."
            )

        if utils.is_string(engine):
            if engine == "dask-cluster":
                from inferelator.distributed.dask_cluster_controller import DaskHPCClusterController
                cls.client = DaskHPCClusterController
            elif engine == "dask-local":
                from inferelator.distributed.dask_local_controller import DaskController
                cls.client = DaskController
            elif engine == "dask-k8":
                from inferelator.distributed.dask_k8_controller import DaskK8Controller
                cls.client = DaskK8Controller
            elif engine == "kvs":
                raise DeprecationWarning(
                    "The KVS engine is deprecated. Use Dask-based multiprocessing"
                )
            elif engine == "multiprocessing":
                from inferelator.distributed.multiprocessing_controller import MultiprocessingController
                cls.client = MultiprocessingController
            elif engine == "local":
                from inferelator.distributed.local_controller import LocalController
                cls.client = LocalController
            else:
                raise ValueError(
                    "Engine {eng_str} unknown".format(eng_str=engine))
        elif issubclass(engine, AbstractController):
            cls.client = engine
        else:
            raise ValueError(
                "Engine must be provided as a string for lookup or an implemented Controller class object"
            )

        utils.Debug.vprint(
            "Inferelator MPControl using engine {eng}".format(eng=cls.name()))

        if processes is not None:
            cls.set_processes(processes)
Example #5
0
    def set_multiprocess_engine(cls, engine):
        """
        Register the multiprocessing engine to use

        Currently available are:

        dask-cluster
        dask-local
        kvs
        multiprocessing
        local

        :param engine: str / Controller object
            A string to lookup the controller or a Controller object
        """
        if cls.is_initialized:
            raise RuntimeError(
                "Client is currently active. Run .shutdown() before changing engines."
            )

        if utils.is_string(engine):
            if engine == "dask-cluster":
                from inferelator.distributed.dask_cluster_controller import DaskHPCClusterController
                cls.client = DaskHPCClusterController
            elif engine == "dask-local":
                from inferelator.distributed.dask_local_controller import DaskController
                cls.client = DaskController
            elif engine == "kvs":
                warnings.warn(
                    "The KVS engine is deprecated. It has been replaced by Dask-based multiprocessing",
                    DeprecationWarning)
                from inferelator.distributed.kvs_controller import KVSController
                cls.client = KVSController
            elif engine == "multiprocessing":
                from inferelator.distributed.multiprocessing_controller import MultiprocessingController
                cls.client = MultiprocessingController
            elif engine == "local":
                from inferelator.distributed.local_controller import LocalController
                cls.client = LocalController
            else:
                raise ValueError(
                    "Engine {eng_str} unknown".format(eng_str=engine))
        elif issubclass(engine, AbstractController):
            cls.client = engine
        else:
            raise ValueError(
                "Engine must be provided as a string for lookup or an implemented Controller class object"
            )

        utils.Debug.vprint(
            "Inferelator MPControl using engine {eng}".format(eng=cls.name()))
Example #6
0
 def get_handler(cls, handler_ref):
     """
     This wrappers a metadata reference so that strings can be used instead of python imports
     Will either return a metadata handling class or will raise an error
     :param handler_ref: str / MetadataParser
         String or subclass of MetadataParser
     :return: MetadataParser
         The metadata parser that corresponds to the string, or the MetadataParser object will be passed through
     """
     if utils.is_string(handler_ref):
         if handler_ref == "branching":
             return MetadataParserBranching
         elif handler_ref == "nonbranching":
             return MetadataParserNonbranching
         else:
             raise ValueError("Parser {parser_str} unknown".format(parser_str=handler_ref))
     elif issubclass(handler_ref, MetadataParser):
         return handler_ref
     else:
         raise ValueError("Handler must be a string or a MetadataParser class")
Example #7
0
 def get_metric(cls, metric_ref):
     """
     This wrappers a metric reference so that strings can be used instead of python imports
     Will either return a metric class or will raise an error
     :param metric_ref: str / RankSummingMetric
         String or subclass of RankSummingMetric
     :return: RankSummingMetric
         The metadata parser that corresponds to the string, or the MetadataParser object will be passed through
     """
     if utils.is_string(metric_ref):
         if metric_ref.lower() == "aupr" or metric_ref.lower(
         ) == "precision-recall":
             from inferelator.postprocessing.model_metrics import RankSummaryPR
             return RankSummaryPR
         else:
             raise ValueError("Parser {parser_str} unknown".format(
                 parser_str=metric_ref))
     elif issubclass(metric_ref, RankSummingMetric):
         return metric_ref
     else:
         raise ValueError(
             "Handler must be a string or a RankSummingMetric class")
def _factory_build_inferelator(regression=_RegressionWorkflowMixin, workflow=WorkflowBase):
    """
    This is the factory method to create workflow classes that combine preprocessing and postprocessing (from workflow)
    with a regression method (from regression)

    :param regression: RegressionWorkflow subclass
        A class object which implements the run_regression and run_bootstrap methods for a specific regression strategy
    :param workflow: WorkflowBase subclass
        A class object which implements the necessary data loading and preprocessing to create design & response data
        for the regression strategy, and then the postprocessing to turn regression betas into a network
    :return RegressWorkflow:
        This returns an uninstantiated class which is the multi-inheritance result of both the regression workflow and
        the preprocessing/postprocessing workflow
    """

    use_mtl_regression = False

    # Decide which preprocessing/postprocessing workflow to use
    # String arguments are parsed for convenience in the run script
    if is_string(workflow):
        workflow = workflow.lower()
        if workflow == "base":
            workflow_class = WorkflowBase
        elif workflow == "tfa":
            from inferelator.tfa_workflow import TFAWorkFlow
            workflow_class = TFAWorkFlow
        elif workflow == "amusr" or workflow == "multitask":
            from inferelator.amusr_workflow import MultitaskLearningWorkflow
            workflow_class = MultitaskLearningWorkflow
            use_mtl_regression = True
        elif workflow == "single-cell":
            from inferelator.single_cell_workflow import SingleCellWorkflow
            workflow_class = SingleCellWorkflow
        elif workflow == "velocity":
            from inferelator.velocity_workflow import VelocityWorkflow
            workflow_class = VelocityWorkflow
        else:
            raise ValueError("{val} is not a string that can be mapped to a workflow class".format(val=workflow))
    # Or just use a workflow class directly
    elif inspect.isclass(workflow) and issubclass(workflow, WorkflowBase):
        workflow_class = workflow
    else:
        raise ValueError("Workflow must be a string that maps to a workflow class or an actual workflow class")

    # Decide which regression workflow to use
    # Return just the workflow if regression is set to None
    if regression is None:
        return workflow_class
    # String arguments are parsed for convenience in the run script
    elif is_string(regression):
        regression = regression.lower()
        if regression == "base":
            regression_class = _RegressionWorkflowMixin
        elif regression == "bbsr" and not use_mtl_regression:
            from inferelator.regression.bbsr_python import BBSRRegressionWorkflowMixin
            regression_class = BBSRRegressionWorkflowMixin
        elif regression == "elasticnet" and not use_mtl_regression:
            from inferelator.regression.elasticnet_python import ElasticNetWorkflowMixin
            regression_class = ElasticNetWorkflowMixin
        elif regression == "amusr":
            from inferelator.regression.amusr_regression import AMUSRRegressionWorkflowMixin
            regression_class = AMUSRRegressionWorkflowMixin
        elif regression == "bbsr-by-task" or (regression == "bbsr" and use_mtl_regression):
            from inferelator.regression.bbsr_multitask import BBSRByTaskRegressionWorkflowMixin
            regression_class = BBSRByTaskRegressionWorkflowMixin
        elif regression == "elasticnet-by-task" or (regression == "elasticnet" and use_mtl_regression):
            from inferelator.regression.elasticnet_python import ElasticNetByTaskRegressionWorkflowMixin
            regression_class = ElasticNetByTaskRegressionWorkflowMixin
        elif regression == "stars-by-task" or (regression == "stars" and use_mtl_regression):
            from inferelator.regression.stability_selection import StARSWorkflowByTaskMixin
            regression_class = StARSWorkflowByTaskMixin
        elif regression == "stars":
            from inferelator.regression.stability_selection import StARSWorkflowMixin
            regression_class = StARSWorkflowMixin
        elif regression == "sklearn" and not use_mtl_regression:
            from inferelator.regression.sklearn_regression import SKLearnWorkflowMixin
            regression_class = SKLearnWorkflowMixin
        elif regression == "sklearn" and use_mtl_regression:
            from inferelator.regression.sklearn_regression import SKLearnByTaskMixin
            regression_class = SKLearnByTaskMixin
        else:
            raise ValueError("{val} is not a string that can be mapped to a regression class".format(val=regression))
    # Or just use a regression class directly
    elif inspect.isclass(regression) and issubclass(regression, _RegressionWorkflowMixin):
        regression_class = regression
    else:
        raise ValueError("Regression must be a string that maps to a regression class or an actual regression class")

    class RegressWorkflow(regression_class, workflow_class):
        regression_type = regression_class

    return RegressWorkflow