"/X_train.p", "rb")) y_train = pickle.load( open( "/home/felix/phd/feature_constraints/" + str(which_experiment) + "/y_train.p", "rb")) all_features = CandidateFeature(IdentityTransformation(-1), numeric_representations) all_standardized = CandidateFeature(MinMaxScalingTransformation(), [all_features]) foreigner = np.array(X_train[:, 7]) gender = np.array( ['female' in personal_status for personal_status in X_train[:, 15]]) my_runner = Runner(c=1.0, sensitive=gender, labels=['bad', 'good']) #my_runner = Runner(c=1.0, sensitive=foreigner, labels=['bad', 'good']) model = xgb.XGBClassifier(objective="binary:logistic", n_estimators=1000, random_state=42) history = [] my_pipeline = Pipeline([('f', all_standardized.pipeline), ('c', model)]) my_pipeline.fit(X_train, y_train) gain_dict = my_pipeline.named_steps['c'].get_booster().get_score( importance_type='gain')
#print(X_train[:,7]) #todo: measure TP for each group and add objective #todo: try misclassification constraint #todo: restart search bit2results = {} ''' foreigner = np.array(X_train[:,7]) gender = np.array(['female' in personal_status for personal_status in X_train[:,15]]) my_runner = Runner(c=1.0, sensitive=gender, labels=['bad', 'good']) #my_runner = Runner(c=1.0, sensitive=foreigner, labels=['bad', 'good']) ''' my_runner = Runner(c=1.0, experiment=which_experiment) history = [] # define an objective function def objective(features): #_, test, pred_test, std_score, proba_pred_test = my_runner.run_pipeline(features, runs=1) results = my_runner.run_pipeline(features, runs=1) #print(features) ''' assert len(y_test) == len(pred_test) which_observation_should_be_predicted_correctly = 333#131 print(proba_pred_test[which_observation_should_be_predicted_correctly])
open( "/home/felix/phd/feature_constraints/" + str(which_experiment) + "/y_train.p", "rb")) all_features = CandidateFeature(IdentityTransformation(-1), numeric_representations) all_standardized = CandidateFeature(MinMaxScalingTransformation(), [all_features]) #foreigner = np.array(X_train[:,7]) #gender = np.array(['female' in personal_status for personal_status in X_train[:,15]]) #my_runner = Runner(c=1.0, sensitive=gender, labels=['bad', 'good'], experiment=which_experiment) #my_runner = Runner(c=1.0, sensitive=foreigner, labels=['bad', 'good']) my_runner = Runner(c=1.0, experiment=which_experiment) #todo: put feature selection within pipeline!!!! history = [] for c in [0.001, 0.01, 0.1, 1, 10, 100, 1000]: model = LogisticRegression(penalty='l1', C=1, solver='saga', class_weight='balanced', max_iter=10000, multi_class='auto') my_pipeline = Pipeline([('f', all_standardized.pipeline), ('c', model)])
from fastsklearnfeature.interactiveAutoML.CreditWrapper import run_pipeline import numpy as np import pickle from hyperopt import fmin, tpe, hp, STATUS_OK, Trials from hyperopt import fmin, tpe, space_eval from fastsklearnfeature.candidates.CandidateFeature import CandidateFeature from fastsklearnfeature.candidates.RawFeature import RawFeature from fastsklearnfeature.transformations.OneHotTransformation import OneHotTransformation from typing import List, Dict, Set from ortools.sat.python import cp_model from fastsklearnfeature.interactiveAutoML.Runner import Runner numeric_representations: List[CandidateFeature] = pickle.load( open("/home/felix/phd/feature_constraints/experiment1/features.p", "rb")) my_runner = Runner(c=1.0, labels=['bad', 'good']) filtered = numeric_representations ''' filtered = [] for f in numeric_representations: if isinstance(f, RawFeature): filtered.append(f) else: if isinstance(f.transformation, OneHotTransformation): filtered.append(f) ''' y_test = pickle.load( open("/home/felix/phd/feature_constraints/experiment1/y_test.p", "rb"))
open( "/home/felix/phd/feature_constraints/" + str(which_experiment) + "/y_train.p", "rb")) all_features = CandidateFeature(IdentityTransformation(-1), numeric_representations) all_standardized = CandidateFeature(MinMaxScalingTransformation(), [all_features]) foreigner = np.array(X_train[:, 7]) gender = np.array( ['female' in personal_status for personal_status in X_train[:, 15]]) sensitive_feature = gender my_runner = Runner(c=1.0, sensitive=sensitive_feature, labels=['bad', 'good']) for c in [0.001, 0.01, 0.1, 1, 10, 100, 1000]: model = LogisticRegression(penalty='l1', C=1, solver='saga', class_weight='balanced', max_iter=10000, multi_class='auto') my_pipeline = Pipeline([('f', all_standardized.pipeline), ('c', model)]) my_pipeline.fit(X_train, y_train) sensitive_values = list(np.unique(sensitive_feature))
"/home/felix/phd/feature_constraints/" + str(which_experiment) + "/X_train.p", "rb")) #print(X_train[:,7]) #todo: measure TP for each group and add objective #todo: try misclassification constraint #todo: restart search bit2results = {} foreigner = np.array(X_train[:, 7]) gender = np.array( ['female' in personal_status for personal_status in X_train[:, 15]]) my_runner = Runner(c=1.0, sensitive=gender, labels=['bad', 'good']) #my_runner = Runner(c=1.0, sensitive=foreigner, labels=['bad', 'good']) history = [] # define an objective function def objective(features): #_, test, pred_test, std_score, proba_pred_test = my_runner.run_pipeline(features, runs=1) results = my_runner.run_pipeline(features, runs=1) #print(features) ''' assert len(y_test) == len(pred_test) which_observation_should_be_predicted_correctly = 333#131 print(proba_pred_test[which_observation_should_be_predicted_correctly])
filtered.append(f) numeric_representations = filtered ''' y_test = pickle.load( open( "/home/felix/phd/feature_constraints/" + str(which_experiment) + "/y_test.p", "rb")).values #todo: measure TP for each group and add objective #todo: try misclassification constraint #todo: restart search bit2results = {} my_runner = Runner(c=1.0) # define an objective function def objective(features): score, test, pred_test, std_score, proba_pred_test = my_runner.run_pipeline( features, runs=1) #print(features) assert len(y_test) == len(pred_test) which_observation_should_be_predicted_correctly = 333 #131 print(proba_pred_test[which_observation_should_be_predicted_correctly]) true_class_index = -1 for c_i in range(len(my_runner.pipeline.classes_)):