コード例 #1
0
def setup(test=False, order=1, learn_options=None, data_file=None):

    if 'num_proc' not in learn_options.keys():
        learn_options['num_proc'] = None
    if 'num_thread_per_proc' not in learn_options.keys():
        learn_options['num_thread_per_proc'] = None

    num_proc = local_multiprocessing.configure(TEST=test, num_proc=learn_options["num_proc"], 
                                                num_thread_per_proc=learn_options["num_thread_per_proc"])
    learn_options["num_proc"] = num_proc

    learn_options["order"] = order  # gets used many places in code, not just here

    if "cv" not in learn_options.keys():
        # if no CV preference is specified, use leave-one-gene-out
        learn_options["cv"] = "gene"

    if "normalize_features" not in learn_options.keys():
        # if no CV preference is specified, use leave-one-gene-out
        learn_options["normalize_features"] = True

    if "weighted" not in learn_options.keys():
        learn_options['weighted'] = None

    if "all pairs" not in learn_options.keys():
        learn_options["all pairs"] = False

    if "include_known_pairs" not in learn_options.keys():
        learn_options["include_known_pairs"] = False

    if "include_gene_guide_feature" not in learn_options.keys():
        learn_options["include_gene_guide_feature"] = 0 #used as window size, so 0 is none

    #these should default to true to match experiments before they were options:
    if "gc_features" not in learn_options.keys():
        learn_options["gc_features"] = True
    if "nuc_features" not in learn_options.keys():
        learn_options["nuc_features"] = True

    if 'train_genes' not in learn_options.keys():
        learn_options["train_genes"] = None
    if 'test_genes' not in learn_options.keys():
        learn_options["test_genes"] = None

    if "num_proc" not in learn_options:
        learn_options["num_proc"] = None
    if "num_thread_per_proc" not in learn_options:
        learn_options["num_thread_per_proc"] = None

    if 'seed' not in learn_options:
        learn_options['seed'] = 1

    if "flipV1target" not in learn_options:
        learn_options["flipV1target"] = False

    if 'num_genes_remove_train' not in learn_options:
        learn_options['num_genes_remove_train'] = None

    if "include_microhomology" not in learn_options:
        learn_options["include_microhomology"] = False


    assert "testing_non_binary_target_name" in learn_options.keys(), "need this in order to get metrics, though used to be not needed, so you may newly see this error"
    if learn_options["testing_non_binary_target_name"] not in ['ranks', 'raw', 'thrs']:
        raise Exception('learn_otions["testing_non_binary_target_name"] must be in ["ranks", "raw", "thrs"]')

    Xdf, Y, gene_position, target_genes = load_data.from_file(data_file, learn_options)
    learn_options['all_genes'] = target_genes

    if test:
        learn_options["order"] = 1
 
    feature_sets = feat.featurize_data(Xdf, learn_options, Y, gene_position)
    np.random.seed(learn_options['seed'])

    return Y, feature_sets, target_genes, learn_options, num_proc
コード例 #2
0
def setup(test=False, order=1, learn_options=None, data_file=None):

    if 'num_proc' not in learn_options.keys():
        learn_options['num_proc'] = None
    if 'num_thread_per_proc' not in learn_options.keys():
        learn_options['num_thread_per_proc'] = None

    num_proc = local_multiprocessing.configure(
        TEST=test,
        num_proc=learn_options["num_proc"],
        num_thread_per_proc=learn_options["num_thread_per_proc"])
    learn_options["num_proc"] = num_proc

    learn_options[
        "order"] = order  # gets used many places in code, not just here

    if "cv" not in learn_options.keys():
        # if no CV preference is specified, use leave-one-gene-out
        learn_options["cv"] = "gene"

    if "normalize_features" not in learn_options.keys():
        # if no CV preference is specified, use leave-one-gene-out
        learn_options["normalize_features"] = True

    if "weighted" not in learn_options.keys():
        learn_options['weighted'] = None

    if "all pairs" not in learn_options.keys():
        learn_options["all pairs"] = False

    if "include_known_pairs" not in learn_options.keys():
        learn_options["include_known_pairs"] = False

    if "include_gene_guide_feature" not in learn_options.keys():
        learn_options[
            "include_gene_guide_feature"] = 0  #used as window size, so 0 is none

    #these should default to true to match experiments before they were options:
    if "gc_features" not in learn_options.keys():
        learn_options["gc_features"] = True
    if "nuc_features" not in learn_options.keys():
        learn_options["nuc_features"] = True

    if 'train_genes' not in learn_options.keys():
        learn_options["train_genes"] = None
    if 'test_genes' not in learn_options.keys():
        learn_options["test_genes"] = None

    if "num_proc" not in learn_options:
        learn_options["num_proc"] = None
    if "num_thread_per_proc" not in learn_options:
        learn_options["num_thread_per_proc"] = None

    if 'seed' not in learn_options:
        learn_options['seed'] = 1

    if "flipV1target" not in learn_options:
        learn_options["flipV1target"] = False

    if 'num_genes_remove_train' not in learn_options:
        learn_options['num_genes_remove_train'] = None

    if "include_microhomology" not in learn_options:
        learn_options["include_microhomology"] = False

    assert "testing_non_binary_target_name" in learn_options.keys(
    ), "need this in order to get metrics, though used to be not needed, so you may newly see this error"
    if learn_options["testing_non_binary_target_name"] not in [
            'ranks', 'raw', 'thrs'
    ]:
        raise Exception(
            'learn_otions["testing_non_binary_target_name"] must be in ["ranks", "raw", "thrs"]'
        )

    Xdf, Y, gene_position, target_genes = load_data.from_file(
        data_file, learn_options)
    learn_options['all_genes'] = target_genes

    if test:
        learn_options["order"] = 1

    feature_sets = feat.featurize_data(Xdf, learn_options, Y, gene_position)
    np.random.seed(learn_options['seed'])

    return Y, feature_sets, target_genes, learn_options, num_proc
コード例 #3
0
def shared_setup(learn_options, order, test):
    if 'num_proc' not in learn_options.keys():
        learn_options['num_proc'] = None
    if 'num_thread_per_proc' not in learn_options.keys():
        learn_options['num_thread_per_proc'] = None

    num_proc = local_multiprocessing.configure(TEST=test, num_proc=learn_options["num_proc"],
                                                num_thread_per_proc=learn_options["num_thread_per_proc"])
    learn_options["num_proc"] = num_proc

    learn_options["order"] = order  # gets used many places in code, not just here

    if "cv" not in learn_options.keys():
        # if no CV preference is specified, use leave-one-gene-out
        learn_options["cv"] = "gene"

    if "normalize_features" not in learn_options.keys():
        # if no CV preference is specified, use leave-one-gene-out
        learn_options["normalize_features"] = True

    if "weighted" not in learn_options.keys():
        learn_options['weighted'] = None

    if "all pairs" not in learn_options.keys():
        learn_options["all pairs"] = False

    if "include_known_pairs" not in learn_options.keys():
        learn_options["include_known_pairs"] = False

    if "include_gene_guide_feature" not in learn_options.keys():
        learn_options["include_gene_guide_feature"] = 0 #used as window size, so 0 is none

    #these should default to true to match experiments before they were options:
    if "gc_features" not in learn_options.keys():
        learn_options["gc_features"] = True
    if "nuc_features" not in learn_options.keys():
        learn_options["nuc_features"] = True

    if 'train_genes' not in learn_options.keys():
        learn_options["train_genes"] = None
    if 'test_genes' not in learn_options.keys():
        learn_options["test_genes"] = None

    if "num_proc" not in learn_options:
        learn_options["num_proc"] = None
    if "num_thread_per_proc" not in learn_options:
        learn_options["num_thread_per_proc"] = None

    if 'seed' not in learn_options:
        learn_options['seed'] = 1

    if "flipV1target" not in learn_options:
        learn_options["flipV1target"] = False

    if 'num_genes_remove_train' not in learn_options:
        learn_options['num_genes_remove_train'] = None

    if "include_microhomology" not in learn_options:
        learn_options["include_microhomology"] = False

    if "algorithm_hyperparam_search" not in learn_options:
        learn_options["algorithm_hyperparam_search"] = "grid" # other options is bo for bayesian optimization

    return num_proc