Exemple #1
0
        "/X_train.p", "rb"))
y_train = pickle.load(
    open(
        "/home/felix/phd/feature_constraints/" + str(which_experiment) +
        "/y_train.p", "rb"))

all_features = CandidateFeature(IdentityTransformation(-1),
                                numeric_representations)
all_standardized = CandidateFeature(MinMaxScalingTransformation(),
                                    [all_features])

foreigner = np.array(X_train[:, 7])
gender = np.array(
    ['female' in personal_status for personal_status in X_train[:, 15]])

my_runner = Runner(c=1.0, sensitive=gender, labels=['bad', 'good'])
#my_runner = Runner(c=1.0, sensitive=foreigner, labels=['bad', 'good'])

model = xgb.XGBClassifier(objective="binary:logistic",
                          n_estimators=1000,
                          random_state=42)

history = []

my_pipeline = Pipeline([('f', all_standardized.pipeline), ('c', model)])

my_pipeline.fit(X_train, y_train)

gain_dict = my_pipeline.named_steps['c'].get_booster().get_score(
    importance_type='gain')
Exemple #2
0
#print(X_train[:,7])

#todo: measure TP for each group and add objective
#todo: try misclassification constraint
#todo: restart search

bit2results = {}
'''
foreigner = np.array(X_train[:,7])
gender = np.array(['female' in personal_status for personal_status in X_train[:,15]])

my_runner = Runner(c=1.0, sensitive=gender, labels=['bad', 'good'])
#my_runner = Runner(c=1.0, sensitive=foreigner, labels=['bad', 'good'])
'''

my_runner = Runner(c=1.0, experiment=which_experiment)

history = []


# define an objective function
def objective(features):
    #_, test, pred_test, std_score, proba_pred_test = my_runner.run_pipeline(features, runs=1)
    results = my_runner.run_pipeline(features, runs=1)

    #print(features)
    '''
	assert len(y_test) == len(pred_test)
	which_observation_should_be_predicted_correctly = 333#131
	print(proba_pred_test[which_observation_should_be_predicted_correctly])
Exemple #3
0
    open(
        "/home/felix/phd/feature_constraints/" + str(which_experiment) +
        "/y_train.p", "rb"))

all_features = CandidateFeature(IdentityTransformation(-1),
                                numeric_representations)
all_standardized = CandidateFeature(MinMaxScalingTransformation(),
                                    [all_features])

#foreigner = np.array(X_train[:,7])
#gender = np.array(['female' in personal_status for personal_status in X_train[:,15]])

#my_runner = Runner(c=1.0, sensitive=gender, labels=['bad', 'good'], experiment=which_experiment)
#my_runner = Runner(c=1.0, sensitive=foreigner, labels=['bad', 'good'])

my_runner = Runner(c=1.0, experiment=which_experiment)

#todo: put feature selection within pipeline!!!!

history = []

for c in [0.001, 0.01, 0.1, 1, 10, 100, 1000]:

    model = LogisticRegression(penalty='l1',
                               C=1,
                               solver='saga',
                               class_weight='balanced',
                               max_iter=10000,
                               multi_class='auto')

    my_pipeline = Pipeline([('f', all_standardized.pipeline), ('c', model)])
Exemple #4
0
from fastsklearnfeature.interactiveAutoML.CreditWrapper import run_pipeline
import numpy as np
import pickle
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt import fmin, tpe, space_eval
from fastsklearnfeature.candidates.CandidateFeature import CandidateFeature
from fastsklearnfeature.candidates.RawFeature import RawFeature
from fastsklearnfeature.transformations.OneHotTransformation import OneHotTransformation
from typing import List, Dict, Set
from ortools.sat.python import cp_model
from fastsklearnfeature.interactiveAutoML.Runner import Runner

numeric_representations: List[CandidateFeature] = pickle.load(
    open("/home/felix/phd/feature_constraints/experiment1/features.p", "rb"))

my_runner = Runner(c=1.0, labels=['bad', 'good'])

filtered = numeric_representations
'''
filtered = []
for f in numeric_representations:
	if isinstance(f, RawFeature):
		filtered.append(f)
	else:
		if isinstance(f.transformation, OneHotTransformation):
			filtered.append(f)
'''

y_test = pickle.load(
    open("/home/felix/phd/feature_constraints/experiment1/y_test.p", "rb"))
Exemple #5
0
    open(
        "/home/felix/phd/feature_constraints/" + str(which_experiment) +
        "/y_train.p", "rb"))

all_features = CandidateFeature(IdentityTransformation(-1),
                                numeric_representations)
all_standardized = CandidateFeature(MinMaxScalingTransformation(),
                                    [all_features])

foreigner = np.array(X_train[:, 7])
gender = np.array(
    ['female' in personal_status for personal_status in X_train[:, 15]])

sensitive_feature = gender

my_runner = Runner(c=1.0, sensitive=sensitive_feature, labels=['bad', 'good'])

for c in [0.001, 0.01, 0.1, 1, 10, 100, 1000]:

    model = LogisticRegression(penalty='l1',
                               C=1,
                               solver='saga',
                               class_weight='balanced',
                               max_iter=10000,
                               multi_class='auto')

    my_pipeline = Pipeline([('f', all_standardized.pipeline), ('c', model)])

    my_pipeline.fit(X_train, y_train)

    sensitive_values = list(np.unique(sensitive_feature))
Exemple #6
0
        "/home/felix/phd/feature_constraints/" + str(which_experiment) +
        "/X_train.p", "rb"))

#print(X_train[:,7])

#todo: measure TP for each group and add objective
#todo: try misclassification constraint
#todo: restart search

bit2results = {}

foreigner = np.array(X_train[:, 7])
gender = np.array(
    ['female' in personal_status for personal_status in X_train[:, 15]])

my_runner = Runner(c=1.0, sensitive=gender, labels=['bad', 'good'])
#my_runner = Runner(c=1.0, sensitive=foreigner, labels=['bad', 'good'])

history = []


# define an objective function
def objective(features):
    #_, test, pred_test, std_score, proba_pred_test = my_runner.run_pipeline(features, runs=1)
    results = my_runner.run_pipeline(features, runs=1)

    #print(features)
    '''
	assert len(y_test) == len(pred_test)
	which_observation_should_be_predicted_correctly = 333#131
	print(proba_pred_test[which_observation_should_be_predicted_correctly])
			filtered.append(f)
numeric_representations = filtered
'''

y_test = pickle.load(
    open(
        "/home/felix/phd/feature_constraints/" + str(which_experiment) +
        "/y_test.p", "rb")).values

#todo: measure TP for each group and add objective
#todo: try misclassification constraint
#todo: restart search

bit2results = {}

my_runner = Runner(c=1.0)


# define an objective function
def objective(features):
    score, test, pred_test, std_score, proba_pred_test = my_runner.run_pipeline(
        features, runs=1)

    #print(features)

    assert len(y_test) == len(pred_test)
    which_observation_should_be_predicted_correctly = 333  #131
    print(proba_pred_test[which_observation_should_be_predicted_correctly])

    true_class_index = -1
    for c_i in range(len(my_runner.pipeline.classes_)):