def one_iteration(TR_set, labels, binary, ensemble_methods, weight, lb, test, hamming): TR_set_used = copy.deepcopy(TR_set) first_predictions = None second_predictions = None #classifiers = measures.create_all(TR_set_used['TR1'], TR_set_used['TR1_outcome'], TR_set_used['TR2'], TR_set_used['TR2_outcome'], labels, binary, ensemble_methods, weight, lb) #best_strings = measures.all_combinations(classifiers, TR_set_used['TR2_outcome'], labels, binary) #print best_strings #print "best strings length" + str(len(best_strings)) best_TR_models = None best_strings = new_classifiers5.names_all_classifiers(ensemble_methods) prev_distance = float("inf") distance = float("inf") #TR_models_used, second_predictions = create_all_expanded_sets(TR_set_used, best_strings, ensemble_methods, binary, weight, lb, labels) #print distance_between(second_predictions, hamming) i = 0 while i < 5: if (prev_distance < distance): best_TR_models = TR_models_used break print i TR_models_used, second_predictions = create_all_expanded_sets( TR_set_used, best_strings, ensemble_methods, binary, weight, lb, labels) #best_strings = get_new_best_strings(TR_models_used, TR_set_used, best_strings, labels, binary) print("distance") prev_distance = distance holdi = distance_between(second_predictions, hamming) distance = holdi[0] largest = holdi[1] print distance print largest #print best_strings i = i + 1 print " " if (test): return best_classifiers_choice(best_TR_models['TR_model'], TR_set_used['TS'], TR_set_used['TS_outcome'], TR_set_used, best_strings, binary, weight, lb, labels) else: return best_classifiers_choice(best_TR_models['TR_model'], TR_set_used['TR3'], TR_set_used['TR3_outcome'], TR_set_used, best_strings, binary, weight, lb, labels)
def best_parameters(TR_set, ensemble_methods, binary): keeper = [] for value in new_classifiers5.create_best_classifiers( new_classifiers5.names_all_classifiers(ensemble_methods), ensemble_methods): keeper.append( tuned_classifier_TR3(TR_set, value['model'], value['tuned_parameters'], value['type'], binary)) return keeper
def expand_all_combine_all(full_training_set, TR_set_used, TS, TS_outcome, best_classifiers, labels, binary, new_features_only, best_strings_first, ensemble_methods, weight, lb): best_strings_second = new_classifiers5.names_all_classifiers( ensemble_methods) return mean_combine.combine(full_training_set, TR_set_used, TS, TS_outcome, best_classifiers, labels, binary, new_features_only, best_strings_first, best_strings_second, ensemble_methods, weight, lb)
def fit_expanded_set_all_models(TR2_expanded, TR2_outcome, ensemble_methods): fitted_models = [] for each_classifier in new_classifiers5.create_best_classifiers( new_classifiers5.names_all_classifiers(ensemble_methods), ensemble_methods): fitted_models.append( tune_classifier_expanded(TR2_expanded, TR2_outcome, each_classifier['model'], each_classifier['tuned_parameters'], each_classifier['type'])) return fitted_models
def expand_all(TR_set_used, labels, binary, new_features_only, training_set3, ensemble_methods, lb, weight): best_strings_first = new_classifiers5.names_all_classifiers( ensemble_methods) best_strings_second = best_strings_first best_classifiers = new_classifiers5.one_iteration( TR_set_used, training_set3, new_features_only, labels, binary, best_strings_first, best_strings_second, ensemble_methods, lb, weight) return (best_classifiers, best_strings_first)
def expand_best(TR_set_used, labels, binary, new_features_only, training_set3, ensemble_methods, lb, weight): #for each classifier train on tr and test on tR3 # list_classifiers = [] for each_classifier in new_classifiers5.create_classifiers( ensemble_methods): model = None if (each_classifier['tuned_parameters'] != []): model = GridSearchCV(each_classifier['model'], each_classifier['tuned_parameters'], cv=10, scoring="accuracy").fit( TR_set_used['TR'], TR_set_used['TR_outcome']) else: model = each_classifier['model'].fit(TR_set_used['TR'], TR_set_used['TR_outcome']) type_hold = each_classifier['type'] predictions = model.predict(TR_set_used['TR3']) roc_score = None if (binary): roc_score = roc_auc_score(predictions, TR_set_used['TR3_outcome']) else: roc_score = new_classifiers5.multi_class_roc( weight, lb, predictions, TR_set_used['TR3_outcome'], labels) hold_tup = (type_hold, predictions, roc_score, model) list_classifiers.append(hold_tup) best_strings_first = greedy.find_best(list_classifiers, TR_set_used['TR3'], TR_set_used['TR3_outcome'], labels, TR_set_used['TR'], TR_set_used['TR_outcome'], weight, lb, binary) best_strings_second = new_classifiers5.names_all_classifiers( ensemble_methods) best_classifiers = new_classifiers5.one_iteration( TR_set_used, training_set3, new_features_only, labels, binary, best_strings_first, best_strings_second, ensemble_methods, lb, weight) return (best_classifiers, best_strings_first)
def linear_stacking(TR_set, ensemble_methods, weight, lb, binary, labels): start_time = time.time() TR2_predictions = [] TS_predictions = [] hold_sets = new_classifiers5.get_new_training( np.column_stack((TR_set['TR_full'], TR_set["TR_full_outcome"]))) for each_classifier in new_classifiers5.create_best_classifiers( new_classifiers5.names_all_classifiers(ensemble_methods), ensemble_methods): store = tuned_classifier(hold_sets['TR1'], hold_sets['TR1_outcome'], each_classifier['model'], each_classifier['tuned_parameters'], each_classifier['type']) if (len(TR2_predictions) == 0): TR2_predictions = store['model'].predict(hold_sets['TR2']) TS_predictions = store['model'].predict(TR_set['TS']) else: TR2_predictions = np.column_stack( (TR2_predictions, store['model'].predict(hold_sets['TR2']))) TS_predictions = np.column_stack( (TS_predictions, store['model'].predict(TR_set['TS']))) tuned_parameters_logistic = [{ 'penalty': ['l1', 'l2'], 'C': [0.01, 0.1, 1, 5, 10] }] model = GridSearchCV(LogisticRegression(), tuned_parameters_logistic, cv=5).fit(TR2_predictions, hold_sets['TR2_outcome']) predictions = model.predict(TS_predictions) if (binary): print "linear stacking roc_auc_score: " + str( roc_auc_score( TR_set['TS_outcome'], predictions, average='weighted')) print " f-score: " + str( f1_score(TR_set['TS_outcome'], predictions, average='binary')) else: print " linear stacking roc_auc_score: " + str( new_classifiers5.multi_class_roc(weight, lb, predictions, TR_set['TS_outcome'], labels)) print " f-score: " + str( f1_score(TR_set['TS_outcome'], predictions, average='weighted')) print("--- %s seconds ---" % (time.time() - start_time)) print " "