def fit(self, X, y=None): fe = ComplexityDrivenFeatureConstruction( None, reader=ScikitReader( X, y, feature_names=self.feature_names, feature_is_categorical=self.feature_is_categorical), score=self.scoring, c_max=self.c_max, folds=self.cv, max_seconds=self.max_time_secs, classifier=self.model.__class__, grid_search_parameters=self.parameter_grid, n_jobs=self.n_jobs, epsilon=self.epsilon, remove_parents=False, transformation_producer=self.transformation_producer) fe.run() numeric_representations = [] for r in fe.all_representations: if 'score' in r.runtime_properties: if not 'object' in str(r.properties['type']): if not isinstance(r.transformation, MinMaxScalingTransformation): #if not (isinstance(r.transformation, HigherOrderCommutativeTransformation) and r.transformation.method == np.nansum): if isinstance(r.sympy_representation, sympy.Mul): found = False for e in r.sympy_representation._args: if e == S.NegativeOne: found = True if found == False: numeric_representations.append(r) else: numeric_representations.append(r) self.numeric_features = numeric_representations my_list = [] for ff in self.numeric_features: my_list.append(str(ff)) with open('/tmp/names.pickle', 'wb') as f: pickle.dump(X, f, pickle.HIGHEST_PROTOCOL) all_features = CandidateFeature(IdentityTransformation(-1), numeric_representations) #all_imputation = CandidateFeature(ImputationTransformation(), [all_features]) all_standardized = CandidateFeature(MinMaxScalingTransformation(), [all_features]) #all_standardized = CandidateFeature(MinMaxScalingTransformation(), [all_features]) self.pipeline_ = all_standardized.pipeline self.pipeline_.fit(X, y) return self
def fit(self, features, target, sample_weight=None, groups=None): self.fe = Run_RawFeatures(None, reader=ScikitReader(features, target, feature_names=self.feature_names, feature_is_categorical=self.feature_is_categorical), classifier=self.model, grid_search_parameters=self.adjusted_hyperparameters, score=self.scoring) self.max_feature_rep = self.fe.run() self.pipeline = self.generate_pipeline().fit(features, target)
def fit(self, features, target, sample_weight=None, groups=None): #self.fe = ComplexityDrivenFeatureConstruction(None, reader=ScikitReader(features, target, feature_names=self.feature_names, feature_is_categorical=self.feature_is_categorical), # score=self.scoring, c_max=np.inf, folds=10, max_seconds=self.max_time_secs, classifier=self.model, grid_search_parameters=self.parameter_grid, n_jobs=self.n_jobs, epsilon=self.epsilon) self.fe = ComplexityDrivenFeatureConstruction( None, reader=ScikitReader( features, target, feature_names=self.feature_names, feature_is_categorical=self.feature_is_categorical), score=self.scoring, c_max=6, folds=10, max_seconds=self.max_time_secs, classifier=self.model, grid_search_parameters=self.parameter_grid, n_jobs=self.n_jobs, epsilon=0.0) self.max_feature_rep = self.fe.run() self.pipeline = self.generate_pipeline().fit(features, target)
class_column_name = 'class' y = data[class_column_name] data_no_class = data[data.columns.difference([class_column_name])] X = data_no_class.values X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.50, random_state=42, stratify=y) ''' parameter_grid = {'penalty': ['l2'], 'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000], 'solver': ['lbfgs'], 'class_weight': ['balanced'], 'max_iter': [10000], 'multi_class': ['auto']} auc=make_scorer(roc_auc_score, greater_is_better=True, needs_threshold=True) fe = ComplexityDrivenFeatureConstruction(None, reader=ScikitReader(X_train, y_train, feature_names=['V' + str(i) for i in range(X_train.shape[1])]), score=auc, c_max=1, folds=2, classifier=LogisticRegression, grid_search_parameters=parameter_grid, n_jobs=4, epsilon=0.0) fe.run() numeric_representations = [] feature_names = [] for r in fe.all_representations: if 'score' in r.runtime_properties: if not 'object' in str(r.properties['type']):
parameter_grid = { 'penalty': ['l2'], 'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000], 'solver': ['lbfgs'], 'class_weight': ['balanced'], 'max_iter': [10000], 'multi_class': ['auto'] } auc = make_scorer(roc_auc_score, greater_is_better=True, needs_threshold=True) fe = ComplexityDrivenFeatureConstruction( None, reader=ScikitReader( X_train, y_train, feature_names=['V' + str(i) for i in range(X_train.shape[1])]), score=auc, c_max=1, folds=2, classifier=LogisticRegression, grid_search_parameters=parameter_grid, n_jobs=4, epsilon=0.0) fe.run() numeric_representations = [] feature_names = [] for r in fe.all_representations:
y = data[class_column_name] data_no_class = data[data.columns.difference([class_column_name])] X = data[data.columns.difference([class_column_name])].values X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.50, random_state=42, stratify=y) model = LogisticRegression parameter_grid = {'penalty': ['l2'], 'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000], 'solver': ['lbfgs'], 'class_weight': ['balanced'], 'max_iter': [10000], 'multi_class': ['auto']} auc=make_scorer(roc_auc_score, greater_is_better=True, needs_threshold=True) fe = ComplexityDrivenFeatureConstruction(None, reader=ScikitReader(X_train, y_train, feature_names=data[data.columns.difference([class_column_name])].columns), score=auc, c_max=2, folds=10, classifier=LogisticRegression, grid_search_parameters=parameter_grid, n_jobs=4, epsilon=0.0) fe.run() numeric_representations = [] feature_names = [] for r in fe.all_representations: if 'score' in r.runtime_properties: if not 'object' in str(r.properties['type']): if not isinstance(r.transformation, MinusTransformation):