Esempio n. 1
0
def run_models(models,
               orders,
               GP_likelihoods=['gaussian', 'warped'],
               WD_kernel_degrees=[3],
               adaboost_learning_rates=[0.1],
               adaboost_num_estimators=[100],
               adaboost_max_depths=[3],
               learn_options_set=None,
               test=False,
               CV=True,
               setup_function=setup,
               set_target_fn=set_target,
               pam_audit=True,
               length_audit=True,
               return_data=False):
    '''
    CV is set to false if want to train a final model and not cross-validate, but it goes in to what
    looks like cv code
    '''

    results = {}
    assert learn_options_set is not None, "need to specify learn_options_set"
    all_learn_options = {}

    # shorten so easier to display on graphs
    feat_models_short = {
        'L1': "L1",
        'L2': "L2",
        'elasticnet': "EN",
        'linreg': "LR",
        'RandomForest': "RF",
        'AdaBoost': "AB",
        'AdaBoostClassifier': "ABClass",
        'doench': 'doench',
        "logregL1": "logregL1",
        "sgrna_from_doench": "sgrna_from_doench",
        'SVC': 'SVC',
        'xu_et_al': 'xu_et_al'
    }

    if not CV:
        print(
            "Received option CV=False, so I'm training using all of the data")
        assert len(list(learn_options_set.keys(
        ))) == 1, "when CV is False, only 1 set of learn options is allowed"
        assert len(models) == 1, "when CV is False, only 1 model is allowed"

    for learn_options_str in list(learn_options_set.keys()):
        # these options get augmented in setup
        partial_learn_opt = learn_options_set[learn_options_str]
        # if the model requires encoded features
        for model in models:
            # models requiring explicit featurization
            if model in list(feat_models_short.keys()):
                for order in orders:
                    print("running %s, order %d for %s" %
                          (model, order, learn_options_str))

                    Y, feature_sets, target_genes, learn_options, num_proc = setup_function(
                        test=test,
                        order=order,
                        learn_options=partial_learn_opt,
                        pam_audit=pam_audit,
                        length_audit=length_audit
                    )  # TODO precompute features for all orders, as this is repated for each model

                    if model == 'L1':
                        learn_options_model = L1_setup(
                            copy.deepcopy(learn_options),
                            set_target_fn=set_target_fn)
                    elif model == 'L2':
                        learn_options_model = L2_setup(
                            copy.deepcopy(learn_options),
                            set_target_fn=set_target_fn)
                    elif model == 'elasticnet':
                        learn_options_model = elasticnet_setup(
                            copy.deepcopy(learn_options),
                            set_target_fn=set_target_fn)
                    elif model == 'linreg':
                        learn_options_model = linreg_setup(
                            copy.deepcopy(learn_options),
                            set_target_fn=set_target_fn)
                    elif model == "logregL1":
                        learn_options_model = logregL1_setup(
                            copy.deepcopy(learn_options),
                            set_target_fn=set_target_fn)
                    elif model == 'RandomForest':
                        learn_options_model = RF_setup(
                            copy.deepcopy(learn_options),
                            set_target_fn=set_target_fn)
                    elif model == 'SVC':
                        learn_options_model = SVC_setup(
                            copy.deepcopy(learn_options),
                            set_target_fn=set_target_fn)
                    elif model == 'doench':
                        learn_options_model = doench_setup(
                            copy.deepcopy(learn_options),
                            set_target_fn=set_target_fn)
                    elif model == 'sgrna_from_doench':
                        learn_options_model = sgrna_from_doench_setup(
                            copy.deepcopy(learn_options),
                            set_target_fn=set_target_fn)
                    elif model == 'xu_et_al':
                        learn_options_model = xu_et_al_setup(
                            copy.deepcopy(learn_options),
                            set_target_fn=set_target_fn)
                    elif model == 'AdaBoost' or 'AdaBoostClassifier':
                        for learning_rate in adaboost_learning_rates:
                            for num_estimators in adaboost_num_estimators:
                                for max_depth in adaboost_max_depths:
                                    learn_options_model = adaboost_setup(
                                        copy.deepcopy(learn_options),
                                        learning_rate=learning_rate,
                                        num_estimators=num_estimators,
                                        max_depth=max_depth,
                                        set_target_fn=set_target_fn,
                                        model=model)
                        model_string = feat_models_short[
                            model] + '_or%d_md%d_lr%.2f_n%d_%s' % (
                                learn_options_set[learn_options_str]["order"],
                                max_depth, learning_rate, num_estimators,
                                learn_options_str)
                    if model != 'AdaBoost':
                        model_string = feat_models_short[
                            model] + '_ord%d_%s' % (
                                learn_options_set[learn_options_str]["order"],
                                learn_options_str)

                    results[model_string] = pd.cross_validate(
                        Y,
                        feature_sets,
                        learn_options=learn_options_model,
                        TEST=test,
                        CV=CV)

                    all_learn_options[model_string] = learn_options_model
            # if the model doesn't require explicit featurization
            else:
                assert setup_fn == setup, "not yet modified to handle this"
                print("running %s for %s" % (model, learn_options_str))
                Y, feature_sets, target_genes, learn_options, num_proc = setup(
                    test=test,
                    order=1,
                    learn_options=partial_learn_opt,
                    pam_audit=pam_audit,
                    length_audit=length_audit)
                if model == 'mean':
                    learn_options_model = mean_setup(
                        copy.deepcopy(learn_options))
                elif model == 'random':
                    learn_options_model = random_setup(
                        copy.deepcopy(learn_options))
                elif model == 'DNN':
                    learn_options_model = DNN_setup(
                        copy.deepcopy(learn_options))
                elif model == 'GP':
                    for likelihood in GP_likelihoods:
                        for degree in WD_kernel_degrees:
                            learn_options_model = GP_setup(
                                copy.deepcopy(learn_options),
                                likelihood=likelihood,
                                degree=degree)
                            model_string = '%s_%s_degree%d_%s' % (
                                model, likelihood, degree, learn_options_str)
                            results[model_string] = pd.cross_validate(
                                Y,
                                feature_sets,
                                learn_options=learn_options_model,
                                TEST=test,
                                CV=CV)

                else:
                    raise NotImplementedError("model %s not supported" % model)

                # "GP" already calls pd.cross_validate() and has its own model_string, so skip this.
                if model != "GP":
                    model_string = model + '_%s' % learn_options_str
                    results[model_string] = pd.cross_validate(
                        Y,
                        feature_sets,
                        learn_options=learn_options_model,
                        TEST=test,
                        CV=CV)

            all_learn_options[model_string] = learn_options_model

    return results, all_learn_options
Esempio n. 2
0
def run_models(models, orders, GP_likelihoods=['gaussian', 'warped'], WD_kernel_degrees=[3],
               adaboost_learning_rates=[0.1], adaboost_num_estimators=[100], adaboost_max_depths=[3],
               learn_options_set=None, test=False, CV=True, setup_function=setup, set_target_fn=set_target):

    '''
    CV is set to false if want to train a final model and not cross-validate, but it goes in to what
    looks like cv code
    '''


    results = {}
    assert learn_options_set is not None, "need to specify learn_options_set"
    all_learn_options = {}

    #shorten so easier to display on graphs
    feat_models_short = {'L1':"L1", 'L2':"L2", 'elasticnet':"EN", 'linreg':"LR",
                         'RandomForest': "RF",
                         'AdaBoost':"AB", 'doench': 'doench',
                         "logregL1": "logregL1", "sgrna_from_doench":"sgrna_from_doench", 'SVC': 'SVC', 'xu_et_al': 'xu_et_al'}

    if not CV:
        print "Received option CV=False, so I'm training using all of the data"
        assert len(learn_options_set.keys()) == 1, "when CV is False, only 1 set of learn options is allowed"
        assert len(models) == 1, "when CV is False, only 1 model is allowed"
                    

    for learn_options_str in learn_options_set.keys():
        # these options get augmented in setup
        partial_learn_opt = learn_options_set[learn_options_str]
        # if the model requires encoded features
        for model in models:
            # models requiring explicit featurization
            if model in feat_models_short.keys():
                for order in orders:
                    print "running %s, order %d for %s" % (model, order, learn_options_str)
                    Y, feature_sets, target_genes, learn_options, num_proc = setup_function(test=test, order=order, learn_options=partial_learn_opt) # TODO precompute features for all orders, as this is repated for each model

                    if model == 'L1':
                        learn_options_model = L1_setup(copy.deepcopy(learn_options), set_target_fn=set_target_fn)
                    elif model == 'L2':
                        learn_options_model = L2_setup(copy.deepcopy(learn_options), set_target_fn=set_target_fn)
                    elif model == 'elasticnet':
                        learn_options_model = elasticnet_setup(copy.deepcopy(learn_options), set_target_fn=set_target_fn)
                    elif model == 'linreg':
                        learn_options_model = linreg_setup(copy.deepcopy(learn_options), set_target_fn=set_target_fn)
                    elif model == "logregL1":
                        learn_options_model = logregL1_setup(copy.deepcopy(learn_options), set_target_fn=set_target_fn)
                    elif model == 'RandomForest':
                        learn_options_model = RF_setup(copy.deepcopy(learn_options), set_target_fn=set_target_fn)
                    elif model == 'SVC':
                        learn_options_model = SVC_setup(copy.deepcopy(learn_options), set_target_fn=set_target_fn)
                    elif model == 'doench':
                        learn_options_model = doench_setup(copy.deepcopy(learn_options), set_target_fn=set_target_fn)
                    elif model == 'sgrna_from_doench':
                        learn_options_model = sgrna_from_doench_setup(copy.deepcopy(learn_options), set_target_fn=set_target_fn)
                    elif model == 'xu_et_al':
                        learn_options_model = xu_et_al_setup(copy.deepcopy(learn_options), set_target_fn=set_target_fn)
                    elif model == 'AdaBoost':
                        for learning_rate in adaboost_learning_rates:
                            for num_estimators in adaboost_num_estimators:
                                for max_depth in adaboost_max_depths:
                                    learn_options_model = adaboost_setup(copy.deepcopy(learn_options), learning_rate=learning_rate, num_estimators=num_estimators, max_depth=max_depth, set_target_fn=set_target_fn)
                        model_string = feat_models_short[model] + '_or%d_md%d_lr%.2f_n%d_%s' % (learn_options_set[learn_options_str]["order"], max_depth, learning_rate, num_estimators, learn_options_str)
                    if model != 'AdaBoost':
                        model_string = feat_models_short[model] + '_ord%d_%s' % (learn_options_set[learn_options_str]["order"], learn_options_str)

                    results[model_string] = pd.cross_validate(Y, feature_sets, learn_options=learn_options_model, TEST=test, CV=CV)

                    all_learn_options[model_string] = learn_options_model
            # if the model doesn't require explicit featurization
            else:
                assert setup_fn==setup, "not yet modified to handle this"
                print "running %s for %s" % (model, learn_options_str)
                Y, feature_sets, target_genes, learn_options, num_proc = setup(test=test, order=1, learn_options=partial_learn_opt)
                if model == 'mean':
                    learn_options_model = mean_setup(copy.deepcopy(learn_options))
                elif model == 'random':
                    learn_options_model = random_setup(copy.deepcopy(learn_options))
                elif model == 'DNN':
                    learn_options_model = DNN_setup(copy.deepcopy(learn_options))
                elif model == 'GP':
                    for likelihood in GP_likelihoods:
                        for degree in WD_kernel_degrees:
                            learn_options_model = GP_setup(copy.deepcopy(learn_options), likelihood=likelihood, degree=degree)
                            model_string = '%s_%s_degree%d_%s' % (model, likelihood, degree, learn_options_str)
                            results[model_string] = pd.cross_validate(Y, feature_sets, learn_options=learn_options_model,TEST=test, CV=CV)

                else:
                    raise NotImplementedError("model %s not supported" % model)

                # "GP" already calls pd.cross_validate() and has its own model_string, so skip this.
                if model != "GP":
                    model_string = model + '_%s' % learn_options_str
                    results[model_string] = pd.cross_validate(Y, feature_sets, learn_options=learn_options_model, TEST=test, CV=CV)

            all_learn_options[model_string] = learn_options_model

    return results, all_learn_options