Exemple #1
0
    def fit(self, X, y=None):
        fe = ComplexityDrivenFeatureConstruction(
            None,
            reader=ScikitReader(
                X,
                y,
                feature_names=self.feature_names,
                feature_is_categorical=self.feature_is_categorical),
            score=self.scoring,
            c_max=self.c_max,
            folds=self.cv,
            max_seconds=self.max_time_secs,
            classifier=self.model.__class__,
            grid_search_parameters=self.parameter_grid,
            n_jobs=self.n_jobs,
            epsilon=self.epsilon,
            remove_parents=False,
            transformation_producer=self.transformation_producer)

        fe.run()

        numeric_representations = []
        for r in fe.all_representations:
            if 'score' in r.runtime_properties:
                if not 'object' in str(r.properties['type']):
                    if not isinstance(r.transformation,
                                      MinMaxScalingTransformation):
                        #if not (isinstance(r.transformation, HigherOrderCommutativeTransformation) and r.transformation.method == np.nansum):
                        if isinstance(r.sympy_representation, sympy.Mul):
                            found = False
                            for e in r.sympy_representation._args:
                                if e == S.NegativeOne:
                                    found = True
                            if found == False:
                                numeric_representations.append(r)
                        else:
                            numeric_representations.append(r)

        self.numeric_features = numeric_representations

        my_list = []
        for ff in self.numeric_features:
            my_list.append(str(ff))

        with open('/tmp/names.pickle', 'wb') as f:
            pickle.dump(X, f, pickle.HIGHEST_PROTOCOL)

        all_features = CandidateFeature(IdentityTransformation(-1),
                                        numeric_representations)

        #all_imputation = CandidateFeature(ImputationTransformation(), [all_features])
        all_standardized = CandidateFeature(MinMaxScalingTransformation(),
                                            [all_features])

        #all_standardized = CandidateFeature(MinMaxScalingTransformation(), [all_features])

        self.pipeline_ = all_standardized.pipeline

        self.pipeline_.fit(X, y)
        return self
Exemple #2
0
    def fit(self, features, target, sample_weight=None, groups=None):
        self.fe = Run_RawFeatures(None, reader=ScikitReader(features, target, feature_names=self.feature_names, feature_is_categorical=self.feature_is_categorical),
                                  classifier=self.model,
                                  grid_search_parameters=self.adjusted_hyperparameters,
                                  score=self.scoring)

        self.max_feature_rep = self.fe.run()

        self.pipeline = self.generate_pipeline().fit(features, target)
Exemple #3
0
    def fit(self, features, target, sample_weight=None, groups=None):
        #self.fe = ComplexityDrivenFeatureConstruction(None, reader=ScikitReader(features, target, feature_names=self.feature_names, feature_is_categorical=self.feature_is_categorical),
        #                                              score=self.scoring, c_max=np.inf, folds=10, max_seconds=self.max_time_secs, classifier=self.model, grid_search_parameters=self.parameter_grid, n_jobs=self.n_jobs, epsilon=self.epsilon)

        self.fe = ComplexityDrivenFeatureConstruction(
            None,
            reader=ScikitReader(
                features,
                target,
                feature_names=self.feature_names,
                feature_is_categorical=self.feature_is_categorical),
            score=self.scoring,
            c_max=6,
            folds=10,
            max_seconds=self.max_time_secs,
            classifier=self.model,
            grid_search_parameters=self.parameter_grid,
            n_jobs=self.n_jobs,
            epsilon=0.0)

        self.max_feature_rep = self.fe.run()

        self.pipeline = self.generate_pipeline().fit(features, target)
Exemple #4
0
class_column_name = 'class'

y = data[class_column_name]
data_no_class = data[data.columns.difference([class_column_name])]

X = data_no_class.values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.50, random_state=42, stratify=y)
'''


parameter_grid = {'penalty': ['l2'], 'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000], 'solver': ['lbfgs'],
				  'class_weight': ['balanced'], 'max_iter': [10000], 'multi_class': ['auto']}

auc=make_scorer(roc_auc_score, greater_is_better=True, needs_threshold=True)

fe = ComplexityDrivenFeatureConstruction(None, reader=ScikitReader(X_train, y_train, feature_names=['V' + str(i) for i in range(X_train.shape[1])]),
                                                      score=auc, c_max=1, folds=2,
                                                      classifier=LogisticRegression,
                                                      grid_search_parameters=parameter_grid, n_jobs=4,
                                                      epsilon=0.0)

fe.run()



numeric_representations = []

feature_names = []
for r in fe.all_representations:
	if 'score' in r.runtime_properties:
		if not 'object' in str(r.properties['type']):
Exemple #5
0
parameter_grid = {
    'penalty': ['l2'],
    'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000],
    'solver': ['lbfgs'],
    'class_weight': ['balanced'],
    'max_iter': [10000],
    'multi_class': ['auto']
}

auc = make_scorer(roc_auc_score, greater_is_better=True, needs_threshold=True)

fe = ComplexityDrivenFeatureConstruction(
    None,
    reader=ScikitReader(
        X_train,
        y_train,
        feature_names=['V' + str(i) for i in range(X_train.shape[1])]),
    score=auc,
    c_max=1,
    folds=2,
    classifier=LogisticRegression,
    grid_search_parameters=parameter_grid,
    n_jobs=4,
    epsilon=0.0)

fe.run()

numeric_representations = []

feature_names = []
for r in fe.all_representations:
Exemple #6
0

y = data[class_column_name]
data_no_class = data[data.columns.difference([class_column_name])]

X = data[data.columns.difference([class_column_name])].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.50, random_state=42, stratify=y)

model = LogisticRegression
parameter_grid = {'penalty': ['l2'], 'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000], 'solver': ['lbfgs'],
				  'class_weight': ['balanced'], 'max_iter': [10000], 'multi_class': ['auto']}

auc=make_scorer(roc_auc_score, greater_is_better=True, needs_threshold=True)

fe = ComplexityDrivenFeatureConstruction(None, reader=ScikitReader(X_train, y_train,
                                                                                feature_names=data[data.columns.difference([class_column_name])].columns),
                                                      score=auc, c_max=2, folds=10,
                                                      classifier=LogisticRegression,
                                                      grid_search_parameters=parameter_grid, n_jobs=4,
                                                      epsilon=0.0)

fe.run()


numeric_representations = []

feature_names = []
for r in fe.all_representations:
	if 'score' in r.runtime_properties:
		if not 'object' in str(r.properties['type']):
			if not isinstance(r.transformation, MinusTransformation):