def run_multiple_cross_validation(feature: CandidateFeature, splitted_values_train, splitted_target_train, parameters, model, score): #try: X_train = splitted_values_train y_train = splitted_target_train pipeline = generate_pipeline(feature, model) #pipeline = generate_smote_pipeline(feature, model) multiple_cv_score = [] multiple_cv_complexity_score = [] hyperparameters2count = {} #print(str(feature) + ' before: ' + str(feature.runtime_properties['hyperparameters'])) for m_i in range(len(nested_my_globale_module.model_seeds)): preprocessed_folds = [] for train, test in StratifiedKFold( n_splits=len(nested_my_globale_module.splitting_seeds), shuffle=True, random_state=nested_my_globale_module.splitting_seeds[m_i] ).split(splitted_values_train, splitted_target_train): preprocessed_folds.append((train, test)) #replace parameter keys new_parameters = copy.deepcopy(parameters) new_parameters['random_state'] = [ int(nested_my_globale_module.model_seeds[m_i]) ] old_keys = list(new_parameters.keys()) for k in old_keys: if not str(k).startswith('c__'): new_parameters['c__' + str(k)] = new_parameters.pop(k) scoring = { 'accuracy': score, 'complexity': make_scorer(customAICc, greater_is_better=False, needs_proba=True, k=feature.get_complexity()) } cv = GridSearchCV(pipeline, param_grid=new_parameters, scoring=scoring, cv=preprocessed_folds, refit='accuracy') cv.fit(X_train, y_train) multiple_cv_score.append(cv.best_score_) multiple_cv_complexity_score.append( cv.cv_results_['mean_test_complexity'][cv.best_index_]) if not hashabledict(cv.best_params_) in hyperparameters2count: hyperparameters2count[hashabledict(cv.best_params_)] = 0 hyperparameters2count[hashabledict(cv.best_params_)] += 1 ''' new_parameters = copy.deepcopy(feature.runtime_properties['hyperparameters']) new_parameters['random_state'] = int(nested_my_globale_module.model_seeds[m_i]) old_keys = list(new_parameters.keys()) for k in old_keys: if not str(k).startswith('c__'): new_parameters['c__' + str(k)] = new_parameters.pop(k) pipeline.set_params(**new_parameters) cv_results = cross_validate(pipeline, X_train, y_train, scoring=score, cv=preprocessed_folds) multiple_cv_score.append(np.mean(cv_results['test_score'])) ''' feature.runtime_properties['hyperparameters'] = max( hyperparameters2count.items(), key=operator.itemgetter(1))[0] new_parameters = copy.deepcopy( feature.runtime_properties['hyperparameters']) old_keys = list(new_parameters.keys()) for k in old_keys: if str(k).startswith('c__'): new_parameters[str(k[3:])] = new_parameters.pop(k) feature.runtime_properties['hyperparameters'] = new_parameters print( str(feature) + ' AICc: ' + str(np.mean(multiple_cv_complexity_score))) #print(str(feature) + ' after: ' + str(feature.runtime_properties['hyperparameters'])) return np.mean(multiple_cv_score), np.std(multiple_cv_score)
def calc_score(c: CandidateFeature): return harmonic_mean(c.runtime_properties['score'] ** 2, (1 / float(c.get_complexity())))