Example #1
0
def one_iteration(TR_set, labels, binary, ensemble_methods, weight, lb, test,
                  hamming):
    TR_set_used = copy.deepcopy(TR_set)

    first_predictions = None
    second_predictions = None

    #classifiers = measures.create_all(TR_set_used['TR1'], TR_set_used['TR1_outcome'], TR_set_used['TR2'], TR_set_used['TR2_outcome'], labels, binary, ensemble_methods, weight, lb)
    #best_strings = measures.all_combinations(classifiers, TR_set_used['TR2_outcome'], labels, binary)

    #print best_strings
    #print "best strings length" + str(len(best_strings))

    best_TR_models = None

    best_strings = new_classifiers5.names_all_classifiers(ensemble_methods)

    prev_distance = float("inf")
    distance = float("inf")

    #TR_models_used, second_predictions = create_all_expanded_sets(TR_set_used, best_strings, ensemble_methods, binary, weight, lb, labels)
    #print distance_between(second_predictions, hamming)

    i = 0
    while i < 5:
        if (prev_distance < distance):
            best_TR_models = TR_models_used
            break
        print i
        TR_models_used, second_predictions = create_all_expanded_sets(
            TR_set_used, best_strings, ensemble_methods, binary, weight, lb,
            labels)
        #best_strings = get_new_best_strings(TR_models_used, TR_set_used, best_strings, labels, binary)
        print("distance")
        prev_distance = distance
        holdi = distance_between(second_predictions, hamming)
        distance = holdi[0]
        largest = holdi[1]
        print distance
        print largest
        #print best_strings
        i = i + 1
        print "  "

    if (test):
        return best_classifiers_choice(best_TR_models['TR_model'],
                                       TR_set_used['TS'],
                                       TR_set_used['TS_outcome'], TR_set_used,
                                       best_strings, binary, weight, lb,
                                       labels)
    else:
        return best_classifiers_choice(best_TR_models['TR_model'],
                                       TR_set_used['TR3'],
                                       TR_set_used['TR3_outcome'], TR_set_used,
                                       best_strings, binary, weight, lb,
                                       labels)
Example #2
0
def best_parameters(TR_set, ensemble_methods, binary):
    keeper = []
    for value in new_classifiers5.create_best_classifiers(
            new_classifiers5.names_all_classifiers(ensemble_methods),
            ensemble_methods):
        keeper.append(
            tuned_classifier_TR3(TR_set, value['model'],
                                 value['tuned_parameters'], value['type'],
                                 binary))
    return keeper
def expand_all_combine_all(full_training_set, TR_set_used, TS, TS_outcome,
                           best_classifiers, labels, binary, new_features_only,
                           best_strings_first, ensemble_methods, weight, lb):
    best_strings_second = new_classifiers5.names_all_classifiers(
        ensemble_methods)
    return mean_combine.combine(full_training_set, TR_set_used, TS, TS_outcome,
                                best_classifiers, labels, binary,
                                new_features_only, best_strings_first,
                                best_strings_second, ensemble_methods, weight,
                                lb)
def fit_expanded_set_all_models(TR2_expanded, TR2_outcome, ensemble_methods):
    fitted_models = []
    for each_classifier in new_classifiers5.create_best_classifiers(
            new_classifiers5.names_all_classifiers(ensemble_methods),
            ensemble_methods):
        fitted_models.append(
            tune_classifier_expanded(TR2_expanded, TR2_outcome,
                                     each_classifier['model'],
                                     each_classifier['tuned_parameters'],
                                     each_classifier['type']))
    return fitted_models
def expand_all(TR_set_used, labels, binary, new_features_only, training_set3,
               ensemble_methods, lb, weight):

    best_strings_first = new_classifiers5.names_all_classifiers(
        ensemble_methods)
    best_strings_second = best_strings_first

    best_classifiers = new_classifiers5.one_iteration(
        TR_set_used, training_set3, new_features_only, labels, binary,
        best_strings_first, best_strings_second, ensemble_methods, lb, weight)

    return (best_classifiers, best_strings_first)
def expand_best(TR_set_used, labels, binary, new_features_only, training_set3,
                ensemble_methods, lb, weight):

    #for each classifier train on tr and test on tR3
    #
    list_classifiers = []
    for each_classifier in new_classifiers5.create_classifiers(
            ensemble_methods):

        model = None
        if (each_classifier['tuned_parameters'] != []):
            model = GridSearchCV(each_classifier['model'],
                                 each_classifier['tuned_parameters'],
                                 cv=10,
                                 scoring="accuracy").fit(
                                     TR_set_used['TR'],
                                     TR_set_used['TR_outcome'])

        else:
            model = each_classifier['model'].fit(TR_set_used['TR'],
                                                 TR_set_used['TR_outcome'])

        type_hold = each_classifier['type']
        predictions = model.predict(TR_set_used['TR3'])

        roc_score = None
        if (binary):
            roc_score = roc_auc_score(predictions, TR_set_used['TR3_outcome'])
        else:
            roc_score = new_classifiers5.multi_class_roc(
                weight, lb, predictions, TR_set_used['TR3_outcome'], labels)

        hold_tup = (type_hold, predictions, roc_score, model)
        list_classifiers.append(hold_tup)

    best_strings_first = greedy.find_best(list_classifiers, TR_set_used['TR3'],
                                          TR_set_used['TR3_outcome'], labels,
                                          TR_set_used['TR'],
                                          TR_set_used['TR_outcome'], weight,
                                          lb, binary)

    best_strings_second = new_classifiers5.names_all_classifiers(
        ensemble_methods)

    best_classifiers = new_classifiers5.one_iteration(
        TR_set_used, training_set3, new_features_only, labels, binary,
        best_strings_first, best_strings_second, ensemble_methods, lb, weight)

    return (best_classifiers, best_strings_first)
Example #7
0
def linear_stacking(TR_set, ensemble_methods, weight, lb, binary, labels):
    start_time = time.time()
    TR2_predictions = []
    TS_predictions = []
    hold_sets = new_classifiers5.get_new_training(
        np.column_stack((TR_set['TR_full'], TR_set["TR_full_outcome"])))
    for each_classifier in new_classifiers5.create_best_classifiers(
            new_classifiers5.names_all_classifiers(ensemble_methods),
            ensemble_methods):
        store = tuned_classifier(hold_sets['TR1'], hold_sets['TR1_outcome'],
                                 each_classifier['model'],
                                 each_classifier['tuned_parameters'],
                                 each_classifier['type'])

        if (len(TR2_predictions) == 0):
            TR2_predictions = store['model'].predict(hold_sets['TR2'])
            TS_predictions = store['model'].predict(TR_set['TS'])
        else:
            TR2_predictions = np.column_stack(
                (TR2_predictions, store['model'].predict(hold_sets['TR2'])))
            TS_predictions = np.column_stack(
                (TS_predictions, store['model'].predict(TR_set['TS'])))

    tuned_parameters_logistic = [{
        'penalty': ['l1', 'l2'],
        'C': [0.01, 0.1, 1, 5, 10]
    }]
    model = GridSearchCV(LogisticRegression(), tuned_parameters_logistic,
                         cv=5).fit(TR2_predictions, hold_sets['TR2_outcome'])
    predictions = model.predict(TS_predictions)

    if (binary):
        print "linear stacking   roc_auc_score: " + str(
            roc_auc_score(
                TR_set['TS_outcome'], predictions, average='weighted'))
        print "                  f-score: " + str(
            f1_score(TR_set['TS_outcome'], predictions, average='binary'))
    else:
        print " linear stacking  roc_auc_score: " + str(
            new_classifiers5.multi_class_roc(weight, lb, predictions,
                                             TR_set['TS_outcome'], labels))
        print "      f-score: " + str(
            f1_score(TR_set['TS_outcome'], predictions, average='weighted'))
    print("--- %s seconds ---" % (time.time() - start_time))
    print " "