def add_preprocess_step(self, fun, **kwargs): """ Add a preprocessing step after count filtering but before calculating TFA or regression. :param fun: Preprocessing function. Can be provided as a string or as a function in `preprocessing.single_cell`. "log10" will take the log10 of pseudocounts "ln" will take the natural log of pseudocounts "log2" will take the log2 of pseudocounts "fft" will do the Freeman-Tukey transform :type fun: str, `preprocessing.single_cell` function :param kwargs: Additional arguments to the preprocessing function """ if self.preprocessing_workflow is None: self.preprocessing_workflow = [] if utils.is_string(fun) and fun.lower() in PREPROCESSING_FUNCTIONS: self.preprocessing_workflow.append((PREPROCESSING_FUNCTIONS[fun], kwargs)) elif utils.is_string(fun) and fun.lower() not in PREPROCESSING_FUNCTIONS: raise ValueError("Unable to translate {f} into a function to call".format(f=fun)) else: self.preprocessing_workflow.append((fun, kwargs))
def add_gridsearch_parameter(self, param_name, param_vector): """ Set a parameter to search through by exhaustive grid search :param param_name: The workflow parameter to change for each run :type param_name: str :param param_vector: An iterable with values to use for the parameter :type param_vector: iterable """ if self.grid_param_values is None: self.grid_param_values = {} if self.grid_params is None: self.grid_params = [] self.grid_params.append(param_name) if utils.is_string(param_vector): self.grid_param_values[param_name] = [param_vector] else: try: # There's probably a better way to check and see if something is iterable but I don't care [True for _ in param_vector] self.grid_param_values[param_name] = param_vector except TypeError: self.grid_param_values[param_name] = [param_vector]
def get_metric(cls, metric_ref): """ This wrappers a metric reference so that strings can be used instead of python imports Will either return a metric class or will raise an error :param metric_ref: str / RankSummingMetric String or subclass of RankSummingMetric :return: RankSummingMetric The metadata parser that corresponds to the string, or the MetadataParser object will be passed through """ if is_string(metric_ref): metric_ref = metric_ref.lower() if metric_ref == "aupr" or metric_ref == "precision-recall": return RankSummaryPR if metric_ref == "mcc" or metric_ref == "matthews correlation coefficient": return RankSummaryMCC if metric_ref == "f1" or metric_ref == "f1 score": return RankSummaryF1 if metric_ref == "combined": return CombinedMetric else: raise ValueError("Parser {parser_str} unknown".format( parser_str=metric_ref)) elif issubclass(metric_ref, RankSummingMetric): return metric_ref else: raise ValueError( "Handler must be a string or a RankSummingMetric class")
def set_multiprocess_engine(cls, engine, processes=None): """ Register the multiprocessing engine to use Currently available are: dask-cluster dask-k8 dask-local multiprocessing local :param engine: A string to lookup the controller or a Controller object :type engine: str, Controller :param processes: Number of processes to use. Equivalent to calling `set_processes` :type processes: int """ if cls.is_initialized: raise RuntimeError( "Client is currently active. Run .shutdown() before changing engines." ) if utils.is_string(engine): if engine == "dask-cluster": from inferelator.distributed.dask_cluster_controller import DaskHPCClusterController cls.client = DaskHPCClusterController elif engine == "dask-local": from inferelator.distributed.dask_local_controller import DaskController cls.client = DaskController elif engine == "dask-k8": from inferelator.distributed.dask_k8_controller import DaskK8Controller cls.client = DaskK8Controller elif engine == "kvs": raise DeprecationWarning( "The KVS engine is deprecated. Use Dask-based multiprocessing" ) elif engine == "multiprocessing": from inferelator.distributed.multiprocessing_controller import MultiprocessingController cls.client = MultiprocessingController elif engine == "local": from inferelator.distributed.local_controller import LocalController cls.client = LocalController else: raise ValueError( "Engine {eng_str} unknown".format(eng_str=engine)) elif issubclass(engine, AbstractController): cls.client = engine else: raise ValueError( "Engine must be provided as a string for lookup or an implemented Controller class object" ) utils.Debug.vprint( "Inferelator MPControl using engine {eng}".format(eng=cls.name())) if processes is not None: cls.set_processes(processes)
def set_multiprocess_engine(cls, engine): """ Register the multiprocessing engine to use Currently available are: dask-cluster dask-local kvs multiprocessing local :param engine: str / Controller object A string to lookup the controller or a Controller object """ if cls.is_initialized: raise RuntimeError( "Client is currently active. Run .shutdown() before changing engines." ) if utils.is_string(engine): if engine == "dask-cluster": from inferelator.distributed.dask_cluster_controller import DaskHPCClusterController cls.client = DaskHPCClusterController elif engine == "dask-local": from inferelator.distributed.dask_local_controller import DaskController cls.client = DaskController elif engine == "kvs": warnings.warn( "The KVS engine is deprecated. It has been replaced by Dask-based multiprocessing", DeprecationWarning) from inferelator.distributed.kvs_controller import KVSController cls.client = KVSController elif engine == "multiprocessing": from inferelator.distributed.multiprocessing_controller import MultiprocessingController cls.client = MultiprocessingController elif engine == "local": from inferelator.distributed.local_controller import LocalController cls.client = LocalController else: raise ValueError( "Engine {eng_str} unknown".format(eng_str=engine)) elif issubclass(engine, AbstractController): cls.client = engine else: raise ValueError( "Engine must be provided as a string for lookup or an implemented Controller class object" ) utils.Debug.vprint( "Inferelator MPControl using engine {eng}".format(eng=cls.name()))
def get_handler(cls, handler_ref): """ This wrappers a metadata reference so that strings can be used instead of python imports Will either return a metadata handling class or will raise an error :param handler_ref: str / MetadataParser String or subclass of MetadataParser :return: MetadataParser The metadata parser that corresponds to the string, or the MetadataParser object will be passed through """ if utils.is_string(handler_ref): if handler_ref == "branching": return MetadataParserBranching elif handler_ref == "nonbranching": return MetadataParserNonbranching else: raise ValueError("Parser {parser_str} unknown".format(parser_str=handler_ref)) elif issubclass(handler_ref, MetadataParser): return handler_ref else: raise ValueError("Handler must be a string or a MetadataParser class")
def get_metric(cls, metric_ref): """ This wrappers a metric reference so that strings can be used instead of python imports Will either return a metric class or will raise an error :param metric_ref: str / RankSummingMetric String or subclass of RankSummingMetric :return: RankSummingMetric The metadata parser that corresponds to the string, or the MetadataParser object will be passed through """ if utils.is_string(metric_ref): if metric_ref.lower() == "aupr" or metric_ref.lower( ) == "precision-recall": from inferelator.postprocessing.model_metrics import RankSummaryPR return RankSummaryPR else: raise ValueError("Parser {parser_str} unknown".format( parser_str=metric_ref)) elif issubclass(metric_ref, RankSummingMetric): return metric_ref else: raise ValueError( "Handler must be a string or a RankSummingMetric class")
def _factory_build_inferelator(regression=_RegressionWorkflowMixin, workflow=WorkflowBase): """ This is the factory method to create workflow classes that combine preprocessing and postprocessing (from workflow) with a regression method (from regression) :param regression: RegressionWorkflow subclass A class object which implements the run_regression and run_bootstrap methods for a specific regression strategy :param workflow: WorkflowBase subclass A class object which implements the necessary data loading and preprocessing to create design & response data for the regression strategy, and then the postprocessing to turn regression betas into a network :return RegressWorkflow: This returns an uninstantiated class which is the multi-inheritance result of both the regression workflow and the preprocessing/postprocessing workflow """ use_mtl_regression = False # Decide which preprocessing/postprocessing workflow to use # String arguments are parsed for convenience in the run script if is_string(workflow): workflow = workflow.lower() if workflow == "base": workflow_class = WorkflowBase elif workflow == "tfa": from inferelator.tfa_workflow import TFAWorkFlow workflow_class = TFAWorkFlow elif workflow == "amusr" or workflow == "multitask": from inferelator.amusr_workflow import MultitaskLearningWorkflow workflow_class = MultitaskLearningWorkflow use_mtl_regression = True elif workflow == "single-cell": from inferelator.single_cell_workflow import SingleCellWorkflow workflow_class = SingleCellWorkflow elif workflow == "velocity": from inferelator.velocity_workflow import VelocityWorkflow workflow_class = VelocityWorkflow else: raise ValueError("{val} is not a string that can be mapped to a workflow class".format(val=workflow)) # Or just use a workflow class directly elif inspect.isclass(workflow) and issubclass(workflow, WorkflowBase): workflow_class = workflow else: raise ValueError("Workflow must be a string that maps to a workflow class or an actual workflow class") # Decide which regression workflow to use # Return just the workflow if regression is set to None if regression is None: return workflow_class # String arguments are parsed for convenience in the run script elif is_string(regression): regression = regression.lower() if regression == "base": regression_class = _RegressionWorkflowMixin elif regression == "bbsr" and not use_mtl_regression: from inferelator.regression.bbsr_python import BBSRRegressionWorkflowMixin regression_class = BBSRRegressionWorkflowMixin elif regression == "elasticnet" and not use_mtl_regression: from inferelator.regression.elasticnet_python import ElasticNetWorkflowMixin regression_class = ElasticNetWorkflowMixin elif regression == "amusr": from inferelator.regression.amusr_regression import AMUSRRegressionWorkflowMixin regression_class = AMUSRRegressionWorkflowMixin elif regression == "bbsr-by-task" or (regression == "bbsr" and use_mtl_regression): from inferelator.regression.bbsr_multitask import BBSRByTaskRegressionWorkflowMixin regression_class = BBSRByTaskRegressionWorkflowMixin elif regression == "elasticnet-by-task" or (regression == "elasticnet" and use_mtl_regression): from inferelator.regression.elasticnet_python import ElasticNetByTaskRegressionWorkflowMixin regression_class = ElasticNetByTaskRegressionWorkflowMixin elif regression == "stars-by-task" or (regression == "stars" and use_mtl_regression): from inferelator.regression.stability_selection import StARSWorkflowByTaskMixin regression_class = StARSWorkflowByTaskMixin elif regression == "stars": from inferelator.regression.stability_selection import StARSWorkflowMixin regression_class = StARSWorkflowMixin elif regression == "sklearn" and not use_mtl_regression: from inferelator.regression.sklearn_regression import SKLearnWorkflowMixin regression_class = SKLearnWorkflowMixin elif regression == "sklearn" and use_mtl_regression: from inferelator.regression.sklearn_regression import SKLearnByTaskMixin regression_class = SKLearnByTaskMixin else: raise ValueError("{val} is not a string that can be mapped to a regression class".format(val=regression)) # Or just use a regression class directly elif inspect.isclass(regression) and issubclass(regression, _RegressionWorkflowMixin): regression_class = regression else: raise ValueError("Regression must be a string that maps to a regression class or an actual regression class") class RegressWorkflow(regression_class, workflow_class): regression_type = regression_class return RegressWorkflow