def setup(test=False, order=1, learn_options=None, data_file=None): if 'num_proc' not in learn_options.keys(): learn_options['num_proc'] = None if 'num_thread_per_proc' not in learn_options.keys(): learn_options['num_thread_per_proc'] = None num_proc = local_multiprocessing.configure(TEST=test, num_proc=learn_options["num_proc"], num_thread_per_proc=learn_options["num_thread_per_proc"]) learn_options["num_proc"] = num_proc learn_options["order"] = order # gets used many places in code, not just here if "cv" not in learn_options.keys(): # if no CV preference is specified, use leave-one-gene-out learn_options["cv"] = "gene" if "normalize_features" not in learn_options.keys(): # if no CV preference is specified, use leave-one-gene-out learn_options["normalize_features"] = True if "weighted" not in learn_options.keys(): learn_options['weighted'] = None if "all pairs" not in learn_options.keys(): learn_options["all pairs"] = False if "include_known_pairs" not in learn_options.keys(): learn_options["include_known_pairs"] = False if "include_gene_guide_feature" not in learn_options.keys(): learn_options["include_gene_guide_feature"] = 0 #used as window size, so 0 is none #these should default to true to match experiments before they were options: if "gc_features" not in learn_options.keys(): learn_options["gc_features"] = True if "nuc_features" not in learn_options.keys(): learn_options["nuc_features"] = True if 'train_genes' not in learn_options.keys(): learn_options["train_genes"] = None if 'test_genes' not in learn_options.keys(): learn_options["test_genes"] = None if "num_proc" not in learn_options: learn_options["num_proc"] = None if "num_thread_per_proc" not in learn_options: learn_options["num_thread_per_proc"] = None if 'seed' not in learn_options: learn_options['seed'] = 1 if "flipV1target" not in learn_options: learn_options["flipV1target"] = False if 'num_genes_remove_train' not in learn_options: learn_options['num_genes_remove_train'] = None if "include_microhomology" not in learn_options: learn_options["include_microhomology"] = False assert "testing_non_binary_target_name" in learn_options.keys(), "need this in order to get metrics, though used to be not needed, so you may newly see this error" if learn_options["testing_non_binary_target_name"] not in ['ranks', 'raw', 'thrs']: raise Exception('learn_otions["testing_non_binary_target_name"] must be in ["ranks", "raw", "thrs"]') Xdf, Y, gene_position, target_genes = load_data.from_file(data_file, learn_options) learn_options['all_genes'] = target_genes if test: learn_options["order"] = 1 feature_sets = feat.featurize_data(Xdf, learn_options, Y, gene_position) np.random.seed(learn_options['seed']) return Y, feature_sets, target_genes, learn_options, num_proc
def setup(test=False, order=1, learn_options=None, data_file=None): if 'num_proc' not in learn_options.keys(): learn_options['num_proc'] = None if 'num_thread_per_proc' not in learn_options.keys(): learn_options['num_thread_per_proc'] = None num_proc = local_multiprocessing.configure( TEST=test, num_proc=learn_options["num_proc"], num_thread_per_proc=learn_options["num_thread_per_proc"]) learn_options["num_proc"] = num_proc learn_options[ "order"] = order # gets used many places in code, not just here if "cv" not in learn_options.keys(): # if no CV preference is specified, use leave-one-gene-out learn_options["cv"] = "gene" if "normalize_features" not in learn_options.keys(): # if no CV preference is specified, use leave-one-gene-out learn_options["normalize_features"] = True if "weighted" not in learn_options.keys(): learn_options['weighted'] = None if "all pairs" not in learn_options.keys(): learn_options["all pairs"] = False if "include_known_pairs" not in learn_options.keys(): learn_options["include_known_pairs"] = False if "include_gene_guide_feature" not in learn_options.keys(): learn_options[ "include_gene_guide_feature"] = 0 #used as window size, so 0 is none #these should default to true to match experiments before they were options: if "gc_features" not in learn_options.keys(): learn_options["gc_features"] = True if "nuc_features" not in learn_options.keys(): learn_options["nuc_features"] = True if 'train_genes' not in learn_options.keys(): learn_options["train_genes"] = None if 'test_genes' not in learn_options.keys(): learn_options["test_genes"] = None if "num_proc" not in learn_options: learn_options["num_proc"] = None if "num_thread_per_proc" not in learn_options: learn_options["num_thread_per_proc"] = None if 'seed' not in learn_options: learn_options['seed'] = 1 if "flipV1target" not in learn_options: learn_options["flipV1target"] = False if 'num_genes_remove_train' not in learn_options: learn_options['num_genes_remove_train'] = None if "include_microhomology" not in learn_options: learn_options["include_microhomology"] = False assert "testing_non_binary_target_name" in learn_options.keys( ), "need this in order to get metrics, though used to be not needed, so you may newly see this error" if learn_options["testing_non_binary_target_name"] not in [ 'ranks', 'raw', 'thrs' ]: raise Exception( 'learn_otions["testing_non_binary_target_name"] must be in ["ranks", "raw", "thrs"]' ) Xdf, Y, gene_position, target_genes = load_data.from_file( data_file, learn_options) learn_options['all_genes'] = target_genes if test: learn_options["order"] = 1 feature_sets = feat.featurize_data(Xdf, learn_options, Y, gene_position) np.random.seed(learn_options['seed']) return Y, feature_sets, target_genes, learn_options, num_proc
def shared_setup(learn_options, order, test): if 'num_proc' not in learn_options.keys(): learn_options['num_proc'] = None if 'num_thread_per_proc' not in learn_options.keys(): learn_options['num_thread_per_proc'] = None num_proc = local_multiprocessing.configure(TEST=test, num_proc=learn_options["num_proc"], num_thread_per_proc=learn_options["num_thread_per_proc"]) learn_options["num_proc"] = num_proc learn_options["order"] = order # gets used many places in code, not just here if "cv" not in learn_options.keys(): # if no CV preference is specified, use leave-one-gene-out learn_options["cv"] = "gene" if "normalize_features" not in learn_options.keys(): # if no CV preference is specified, use leave-one-gene-out learn_options["normalize_features"] = True if "weighted" not in learn_options.keys(): learn_options['weighted'] = None if "all pairs" not in learn_options.keys(): learn_options["all pairs"] = False if "include_known_pairs" not in learn_options.keys(): learn_options["include_known_pairs"] = False if "include_gene_guide_feature" not in learn_options.keys(): learn_options["include_gene_guide_feature"] = 0 #used as window size, so 0 is none #these should default to true to match experiments before they were options: if "gc_features" not in learn_options.keys(): learn_options["gc_features"] = True if "nuc_features" not in learn_options.keys(): learn_options["nuc_features"] = True if 'train_genes' not in learn_options.keys(): learn_options["train_genes"] = None if 'test_genes' not in learn_options.keys(): learn_options["test_genes"] = None if "num_proc" not in learn_options: learn_options["num_proc"] = None if "num_thread_per_proc" not in learn_options: learn_options["num_thread_per_proc"] = None if 'seed' not in learn_options: learn_options['seed'] = 1 if "flipV1target" not in learn_options: learn_options["flipV1target"] = False if 'num_genes_remove_train' not in learn_options: learn_options['num_genes_remove_train'] = None if "include_microhomology" not in learn_options: learn_options["include_microhomology"] = False if "algorithm_hyperparam_search" not in learn_options: learn_options["algorithm_hyperparam_search"] = "grid" # other options is bo for bayesian optimization return num_proc