MPControl.set_multiprocess_engine("dask-cluster") MPControl.client.use_default_configuration("rusty_ccb", n_jobs=n_jobs) MPControl.client.add_worker_conda( "source ~/.local/anaconda3/bin/activate inferelator") MPControl.client.add_slurm_command_line("--constraint=broadwell") MPControl.connect() if __name__ == '__main__': set_up_dask() utils.Debug.vprint("Testing preprocessing", level=0) # Figure 5D: BBSR By Task Learning worker = workflow.inferelator_workflow(regression="amusr", workflow="multitask") set_up_workflow(worker) worker.append_to_path('output_dir', 'figure_4_count') cv_wrap = set_up_cv_seeds(worker) cv_wrap.run() del cv_wrap del worker worker = workflow.inferelator_workflow(regression="amusr", workflow="multitask") set_up_workflow(worker) worker.add_preprocess_step("log2") worker.append_to_path('output_dir', 'figure_4_log2') cv_wrap = set_up_cv_seeds(worker) cv_wrap.run()
MPControl.set_multiprocess_engine("dask-cluster") MPControl.client.use_default_configuration("rusty_ccb", n_jobs=n_jobs) MPControl.client.add_worker_conda( "source ~/.local/anaconda3/bin/activate inferelator") MPControl.client.add_slurm_command_line("--constraint=broadwell") MPControl.connect() if __name__ == '__main__': set_up_dask() utils.Debug.vprint("Testing preprocessing", level=0) # Figure 5D: BBSR By Task Learning worker = workflow.inferelator_workflow(regression="bbsr-by-task", workflow="multitask") set_up_workflow(worker) worker.append_to_path('output_dir', 'figure_4_count') cv_wrap = set_up_cv_seeds(worker) #cv_wrap.run() del cv_wrap del worker worker = workflow.inferelator_workflow(regression="bbsr-by-task", workflow="multitask") set_up_workflow(worker) worker.add_preprocess_step("log2") worker.append_to_path('output_dir', 'figure_4_log2') cv_wrap = set_up_cv_seeds(worker) #cv_wrap.run()
priors_file=PRIORS_FILE_NAME, gold_standard_file=GOLD_STANDARD_FILE_NAME) wkf.set_expression_file(tsv=EXPRESSION_FILE_NAME) wkf.set_file_properties(expression_matrix_columns_are_genes=False) wkf.set_run_parameters(num_bootstraps=5) wkf.set_crossvalidation_parameters(split_gold_standard_for_crossvalidation=True, cv_split_ratio=0.2) return wkf # Inference with BBSR (crossvalidation) # Using the crossvalidation wrapper # Run the regression 10 times and hold 20% of the gold standard out of the priors for testing each time # Each run is seeded differently (and therefore has different holdouts) # Create a worker worker = workflow.inferelator_workflow(regression="bbsr", workflow="tfa") worker = set_up_workflow(worker) worker.append_to_path("output_dir", "bbsr") # Create a crossvalidation wrapper cv_wrap = CrossValidationManager(worker) # Assign variables for grid search cv_wrap.add_gridsearch_parameter('random_seed', CV_SEEDS) # Run cv_wrap.run() # Inference with Elastic Net (crossvalidation) # Using the crossvalidation wrapper # Run the regression 10 times and hold 20% of the gold standard out of the priors for testing each time
MPControl.set_multiprocess_engine("dask-cluster") MPControl.client.use_default_configuration("rusty_ccb", n_jobs=n_jobs) MPControl.client.add_worker_conda( "source ~/.local/anaconda3/bin/activate inferelator") MPControl.client.add_slurm_command_line("--constraint=broadwell") MPControl.connect() if __name__ == '__main__': set_up_dask() utils.Debug.vprint("Testing preprocessing", level=0) # Figure 5D: BBSR By Task Learning worker = workflow.inferelator_workflow(regression="bbsr", workflow="single-cell") set_up_workflow(worker) worker.append_to_path('output_dir', 'figure_4_count') cv_wrap = set_up_cv_seeds(worker) cv_wrap.run() del cv_wrap del worker worker = workflow.inferelator_workflow(regression="bbsr", workflow="single-cell") set_up_workflow(worker) worker.add_preprocess_step("log2") worker.append_to_path('output_dir', 'figure_4_log2') cv_wrap = set_up_cv_seeds(worker) cv_wrap.run()
YEASTRACT_TF_NAMES = "tf_names_yeastract.txt" INPUT_DIR = '/mnt/ceph/users/cjackson/inferelator/data/yeast' OUTPUT_PATH = '/mnt/ceph/users/cjackson/jackson_2019_inferelator_v050/' OUTPUT_FOLDER = "tau43_c" if __name__ == '__main__': MPControl.set_multiprocess_engine("dask-cluster") MPControl.client.use_default_configuration("rusty_preempt") MPControl.client.set_job_size_params(n_jobs=1) MPControl.client.add_worker_conda( "source ~/.local/anaconda3/bin/activate inferelator") MPControl.connect() wkf = inferelator_workflow("stars", VelocityWorkflow) wkf.set_file_paths(input_dir=INPUT_DIR, output_dir=os.path.join(OUTPUT_PATH, OUTPUT_FOLDER), gold_standard_file='gold_standard.tsv', priors_file=YEASTRACT_PRIOR, tf_names_file=YEASTRACT_TF_NAMES) wkf.set_expression_file(h5ad=DATA_FILE, h5_layer="counts") wkf.set_velocity_parameters(velocity_file_name=DATA_FILE, velocity_file_type="h5ad", velocity_file_layer="pv") wkf.set_crossvalidation_parameters( split_gold_standard_for_crossvalidation=True, cv_split_ratio=0.5) wkf.set_run_parameters(num_bootstraps=5) wkf.set_count_minimum(0.05) wkf.add_preprocess_step(single_cell.log2_data) wkf.tau = 43.28
def setUp(self): self.worker = inferelator_workflow()
from inferelator import workflow from inferelator.distributed.inferelator_mp import MPControl from inferelator import utils utils.Debug.set_verbose_level(1) MPControl.set_multiprocess_engine("multiprocessing") MPControl.client.processes = 3 wflow = workflow.inferelator_workflow(regression="bbsr", workflow="tfa") # Common configuration parameters wflow.input_dir = 'data/bsubtilis' wflow.num_bootstraps = 2 wflow.delTmax = 110 wflow.delTmin = 0 wflow.tau = 45 if __name__ == "__main__": wflow.run()
MPControl.set_multiprocess_engine("dask-cluster") MPControl.client.minimum_cores = n_cores MPControl.client.maximum_cores = n_cores MPControl.client.walltime = '48:00:00' MPControl.client.add_worker_env_line('module load slurm') MPControl.client.add_worker_env_line('module load gcc/8.3.0') MPControl.client.add_worker_env_line('source ' + CONDA_ACTIVATE_PATH) MPControl.client.cluster_controller_options.append("-p ccb") MPControl.connect() if __name__ == '__main__': start_mpcontrol_dask(100) for seed in range(42, 52): worker = workflow.inferelator_workflow(regression="amusr", workflow="amusr") worker.set_file_paths(input_dir=INPUT_DIR, output_dir=OUTPUT_DIR, gold_standard_file="gold_standard.tsv", gene_metadata_file="orfs.tsv", priors_file=YEASTRACT_PRIOR, tf_names_file=YEASTRACT_TF_NAMES) worker.set_file_properties(gene_list_index="SystematicName") worker.set_task_filters(target_expression_filter="union", regulator_expression_filter="intersection") worker.set_run_parameters(num_bootstraps=5, random_seed=seed) worker.set_crossvalidation_parameters( split_gold_standard_for_crossvalidation=True, cv_split_ratio=0.2) worker.append_to_path('output_dir',
def test_tfa(self): from inferelator.tfa_workflow import TFAWorkFlow worker = workflow.inferelator_workflow(regression=RegressionWorkflow, workflow="tfa") self.assertTrue(isinstance(worker, TFAWorkFlow))
def test_singlecell(self): from inferelator.single_cell_workflow import SingleCellWorkflow worker = workflow.inferelator_workflow(regression=RegressionWorkflow, workflow="single-cell") self.assertTrue(isinstance(worker, SingleCellWorkflow))
def test_elasticnet(self): from inferelator.regression.elasticnet_python import ElasticNetWorkflow worker = workflow.inferelator_workflow(regression="elasticnet", workflow=workflow.WorkflowBase) self.assertTrue(isinstance(worker, ElasticNetWorkflow))
def test_bbsr(self): from inferelator.regression.bbsr_python import BBSRRegressionWorkflow worker = workflow.inferelator_workflow(regression="bbsr", workflow=workflow.WorkflowBase) self.assertTrue(isinstance(worker, BBSRRegressionWorkflow))
def test_base(self): worker = workflow.inferelator_workflow(regression=None, workflow=workflow.WorkflowBase) with self.assertRaises(NotImplementedError): worker.run()
TF_NAMES = "tf_names_gold_standard.txt" YEASTRACT_TF_NAMES = "tf_names_yeastract.txt" INPUT_DIR = '/mnt/ceph/users/cjackson/inferelator/data/yeast' OUTPUT_PATH = '/mnt/ceph/users/cjackson/jackson_2019_inferelator_v050/' if __name__ == '__main__': MPControl.set_multiprocess_engine("dask-cluster") MPControl.client.use_default_configuration("rusty_preempt") MPControl.client.set_job_size_params(n_jobs=1) MPControl.client.add_worker_conda( "source ~/.local/anaconda3/bin/activate inferelator") MPControl.connect() wkf = inferelator_workflow("stars", "single-cell") wkf.set_file_paths(input_dir=INPUT_DIR, output_dir=os.path.join(OUTPUT_PATH, "no_velocity"), gold_standard_file='gold_standard.tsv', priors_file=YEASTRACT_PRIOR, tf_names_file=YEASTRACT_TF_NAMES) wkf.set_expression_file(h5ad=DATA_FILE, h5_layer="smooth_count") wkf.set_crossvalidation_parameters( split_gold_standard_for_crossvalidation=True, cv_split_ratio=0.5) wkf.set_run_parameters(num_bootstraps=5) wkf.set_count_minimum(0.05) wkf.add_preprocess_step(single_cell.log2_data) cv_wrap = crossvalidation_workflow.CrossValidationManager(wkf) cv_wrap.add_gridsearch_parameter('random_seed', list(range(42, 52)))
metadata_handler="nonbranching") wkf.set_run_parameters(num_bootstraps=5) wkf.set_crossvalidation_parameters( split_gold_standard_for_crossvalidation=True, cv_split_ratio=0.2) wkf.append_to_path('output_dir', "yeast_calico") if __name__ == '__main__': MPControl.set_multiprocess_engine("dask-cluster") MPControl.client.use_default_configuration("rusty_ccb", n_jobs=3) MPControl.client.add_worker_conda( "source ~/.local/anaconda3/bin/activate inferelator") MPControl.connect() worker = workflow.inferelator_workflow(regression="stars", workflow="single-cell") single_cell_setup(worker) # Create a crossvalidation wrapper cv_wrap = CrossValidationManager(worker) # Assign variables for grid search cv_wrap.add_gridsearch_parameter('random_seed', list(range(42, 52))) # Run # cv_wrap.run() del worker del cv_wrap worker = workflow.inferelator_workflow(regression="stars", workflow="tfa") calico_setup(worker)
MPControl.set_multiprocess_engine("dask-cluster") MPControl.client.minimum_cores = n_cores MPControl.client.maximum_cores = n_cores MPControl.client.walltime = '48:00:00' MPControl.client.add_worker_env_line('module load slurm') MPControl.client.add_worker_env_line('module load gcc/8.3.0') MPControl.client.add_worker_env_line('source ' + CONDA_ACTIVATE_PATH) MPControl.client.cluster_controller_options.append("-p ccb") MPControl.connect() if __name__ == '__main__': start_mpcontrol_dask(100) for seed in range(42, 52): worker = workflow.inferelator_workflow( regression=BBSRByTaskRegressionWorkflow, workflow="amusr") worker.set_file_paths(input_dir=INPUT_DIR, output_dir=OUTPUT_DIR, gold_standard_file="gold_standard.tsv", gene_metadata_file="orfs.tsv", priors_file=YEASTRACT_PRIOR, tf_names_file=YEASTRACT_TF_NAMES) worker.set_file_properties(gene_list_index="SystematicName") worker.set_task_filters(target_expression_filter="union", regulator_expression_filter="intersection") worker.set_run_parameters(num_bootstraps=5, random_seed=seed) worker.set_crossvalidation_parameters( split_gold_standard_for_crossvalidation=True, cv_split_ratio=0.2) worker.append_to_path('output_dir', "hybrid_bbsr_mtl_seed_" + str(seed))
def setUp(self): self.workflow = workflow.inferelator_workflow(workflow="amusr", regression="amusr") self.workflow.create_output_dir = lambda *x: None self.workflow.gold_standard = TaskDataStub.priors_data.copy()