def test_parallel_classification(): # Check parallel classification. rng = check_random_state(0) # Classification X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=rng) ensemble = BaggingClassifier(DecisionTreeClassifier(), n_jobs=3, random_state=0).fit(X_train, y_train) # predict_proba ensemble.set_params(n_jobs=1) y1 = ensemble.predict_proba(X_test) ensemble.set_params(n_jobs=2) y2 = ensemble.predict_proba(X_test) assert_array_almost_equal(y1, y2) ensemble = BaggingClassifier(DecisionTreeClassifier(), n_jobs=1, random_state=0).fit(X_train, y_train) y3 = ensemble.predict_proba(X_test) assert_array_almost_equal(y1, y3) # decision_function ensemble = BaggingClassifier(SVC(decision_function_shape="ovr"), n_jobs=3, random_state=0).fit(X_train, y_train) ensemble.set_params(n_jobs=1) decisions1 = ensemble.decision_function(X_test) ensemble.set_params(n_jobs=2) decisions2 = ensemble.decision_function(X_test) assert_array_almost_equal(decisions1, decisions2) ensemble = BaggingClassifier(SVC(decision_function_shape="ovr"), n_jobs=1, random_state=0).fit(X_train, y_train) decisions3 = ensemble.decision_function(X_test) assert_array_almost_equal(decisions1, decisions3)
def test_parallel_classification(): # Check parallel classification. rng = check_random_state(0) # Classification X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=rng) ensemble = BaggingClassifier(DecisionTreeClassifier(), n_jobs=3, random_state=0).fit(X_train, y_train) # predict_proba ensemble.set_params(n_jobs=1) y1 = ensemble.predict_proba(X_test) ensemble.set_params(n_jobs=2) y2 = ensemble.predict_proba(X_test) assert_array_almost_equal(y1, y2) ensemble = BaggingClassifier(DecisionTreeClassifier(), n_jobs=1, random_state=0).fit(X_train, y_train) y3 = ensemble.predict_proba(X_test) assert_array_almost_equal(y1, y3) # decision_function ensemble = BaggingClassifier(SVC(gamma='scale', decision_function_shape='ovr'), n_jobs=3, random_state=0).fit(X_train, y_train) ensemble.set_params(n_jobs=1) decisions1 = ensemble.decision_function(X_test) ensemble.set_params(n_jobs=2) decisions2 = ensemble.decision_function(X_test) assert_array_almost_equal(decisions1, decisions2) X_err = np.hstack((X_test, np.zeros((X_test.shape[0], 1)))) assert_raise_message( ValueError, "Number of features of the model " "must match the input. Model n_features is {0} " "and input n_features is {1} " "".format(X_test.shape[1], X_err.shape[1]), ensemble.decision_function, X_err) ensemble = BaggingClassifier(SVC(gamma='scale', decision_function_shape='ovr'), n_jobs=1, random_state=0).fit(X_train, y_train) decisions3 = ensemble.decision_function(X_test) assert_array_almost_equal(decisions1, decisions3)
def test_parallel_classification(): # Check parallel classification. rng = check_random_state(0) # Classification X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=rng) ensemble = BaggingClassifier(DecisionTreeClassifier(), n_jobs=3, random_state=0).fit(X_train, y_train) # predict_proba ensemble.set_params(n_jobs=1) y1 = ensemble.predict_proba(X_test) ensemble.set_params(n_jobs=2) y2 = ensemble.predict_proba(X_test) assert_array_almost_equal(y1, y2) ensemble = BaggingClassifier(DecisionTreeClassifier(), n_jobs=1, random_state=0).fit(X_train, y_train) y3 = ensemble.predict_proba(X_test) assert_array_almost_equal(y1, y3) # decision_function ensemble = BaggingClassifier(SVC(gamma='scale', decision_function_shape='ovr'), n_jobs=3, random_state=0).fit(X_train, y_train) ensemble.set_params(n_jobs=1) decisions1 = ensemble.decision_function(X_test) ensemble.set_params(n_jobs=2) decisions2 = ensemble.decision_function(X_test) assert_array_almost_equal(decisions1, decisions2) X_err = np.hstack((X_test, np.zeros((X_test.shape[0], 1)))) assert_raise_message(ValueError, "Number of features of the model " "must match the input. Model n_features is {0} " "and input n_features is {1} " "".format(X_test.shape[1], X_err.shape[1]), ensemble.decision_function, X_err) ensemble = BaggingClassifier(SVC(gamma='scale', decision_function_shape='ovr'), n_jobs=1, random_state=0).fit(X_train, y_train) decisions3 = ensemble.decision_function(X_test) assert_array_almost_equal(decisions1, decisions3)
def test_parallel_classification(): # Check parallel classification. rng = check_random_state(0) # Classification X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=rng) ensemble = BaggingClassifier(DecisionTreeClassifier(), n_jobs=3, random_state=0).fit(X_train, y_train) # predict_proba ensemble.set_params(n_jobs=1) y1 = ensemble.predict_proba(X_test) ensemble.set_params(n_jobs=2) y2 = ensemble.predict_proba(X_test) assert_array_almost_equal(y1, y2) ensemble = BaggingClassifier(DecisionTreeClassifier(), n_jobs=1, random_state=0).fit(X_train, y_train) y3 = ensemble.predict_proba(X_test) assert_array_almost_equal(y1, y3) # decision_function ensemble = BaggingClassifier(SVC(decision_function_shape='ovr'), n_jobs=3, random_state=0).fit(X_train, y_train) ensemble.set_params(n_jobs=1) decisions1 = ensemble.decision_function(X_test) ensemble.set_params(n_jobs=2) decisions2 = ensemble.decision_function(X_test) assert_array_almost_equal(decisions1, decisions2) X_err = np.hstack((X_test, np.zeros((X_test.shape[0], 1)))) err_msg = (f"Number of features of the model must match the input. Model " f"n_features is {X_test.shape[1]} and input n_features is " f"{X_err.shape[1]} ") with pytest.raises(ValueError, match=err_msg): ensemble.decision_function(X_err) ensemble = BaggingClassifier(SVC(decision_function_shape='ovr'), n_jobs=1, random_state=0).fit(X_train, y_train) decisions3 = ensemble.decision_function(X_test) assert_array_almost_equal(decisions1, decisions3)
class GrantModel(): def train(self, features, labels): cores = 8 self.vectorizer = DictVectorizer(sparse=True) self.sentiment = BaggingClassifier(svm.LinearSVC(), max_samples=1.0 / cores, n_estimators=cores, n_jobs=cores) train_vec = self.vectorizer.fit_transform(features) self.sentiment.fit(train_vec, labels) def extract_features(self, tweet): feats = {} tweet = tweet.split(' ') feats['NUMCAPS'] = 0 for j in range(len(tweet)): word = tweet[j] if len(word) > 0 and word[0] != '@': feats['WORD=' + word.lower()] = 1 feats['NUMCAPS'] += sum(1 for char in word if char.isupper()) return feats def predict(self, newTweetTexts): feats = [] for text in newTweetTexts: feats.append(self.extract_features(text)) feat_vec = self.vectorizer.transform(feats) return self.sentiment.decision_function(feat_vec)
def othertest(precisionk, draw='False'): cleandata = pd.read_csv("./data/cleaned_knnimpute.csv") cleandata.index = cleandata.sid cleandata = cleandata.drop('sid', 1) mask = np.isnan(cleandata['Y']) cleandata = cleandata[mask == False] #After c is chosen, use this to draw AUC plot train_id, test_id = train_test_split(cleandata.index, test_size=0.2) # test_ratio = 0.2 train = cleandata.ix[train_id] test = cleandata.ix[test_id] coltest = precisionCol(train, precisionk) coltest = list(coltest) coltest.append('Y') train = train[coltest] test = test[coltest] model = BaggingClassifier(base_estimator=linear_model.LogisticRegression(), n_estimators=100, max_features=200, n_jobs=-1) model.fit(train.drop('Y', 1), train['Y']) fpr, tpr, thresholds = roc_curve( test['Y'], model.predict_proba(test.drop('Y', 1))[:, 1]) print auc(fpr, tpr) if draw == 'True': plotAUC(test['Y'], model.decision_function(test.drop('Y', 1)), 'Gradient Boosting') plt.savefig("testnorm_randomforest.png", dpi=120)
class BaggingClassifierImpl(): def __init__(self, base_estimator=None, n_estimators=10, max_samples=1.0, max_features=1.0, bootstrap=True, bootstrap_features=False, oob_score=False, warm_start=False, n_jobs=None, random_state=None, verbose=0): self._hyperparams = { 'base_estimator': make_sklearn_compat(base_estimator), 'n_estimators': n_estimators, 'max_samples': max_samples, 'max_features': max_features, 'bootstrap': bootstrap, 'bootstrap_features': bootstrap_features, 'oob_score': oob_score, 'warm_start': warm_start, 'n_jobs': n_jobs, 'random_state': random_state, 'verbose': verbose} self._wrapped_model = SKLModel(**self._hyperparams) def fit(self, X, y=None): if (y is not None): self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def predict(self, X): return self._wrapped_model.predict(X) def predict_proba(self, X): return self._wrapped_model.predict_proba(X) def decision_function(self, X): return self._wrapped_model.decision_function(X)
def test_parallel_classification(): # Check parallel classification. rng = check_random_state(0) # Classification X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=rng) ensemble = BaggingClassifier(DecisionTreeClassifier(), n_jobs=3, random_state=0).fit(X_train, y_train) # predict_proba ensemble.set_params(n_jobs=1) y1 = ensemble.predict_proba(X_test) ensemble.set_params(n_jobs=2) y2 = ensemble.predict_proba(X_test) assert_array_almost_equal(y1, y2) ensemble = BaggingClassifier(DecisionTreeClassifier(), n_jobs=1, random_state=0).fit(X_train, y_train) y3 = ensemble.predict_proba(X_test) assert_array_almost_equal(y1, y3) # decision_function ensemble = BaggingClassifier(SVC(decision_function_shape='ovr'), n_jobs=3, random_state=0).fit(X_train, y_train) ensemble.set_params(n_jobs=1) decisions1 = ensemble.decision_function(X_test) ensemble.set_params(n_jobs=2) decisions2 = ensemble.decision_function(X_test) assert_array_almost_equal(decisions1, decisions2) ensemble = BaggingClassifier(SVC(decision_function_shape='ovr'), n_jobs=1, random_state=0).fit(X_train, y_train) decisions3 = ensemble.decision_function(X_test) assert_array_almost_equal(decisions1, decisions3)
class ensemble: """ """ def __init__(self, X=0, labels=0, name='linear', rand=42): self.X = X self.labels = labels self.name = name self.model = [] self.rand = rand def bagging(self, oob_val=False): from sklearn.ensemble import BaggingClassifier from sklearn.tree import DecisionTreeClassifier self.model = BaggingClassifier(DecisionTreeClassifier(), n_estimators=500, max_samples=100, bootstrap=True, n_jobs=-1, oob_score_=oob_val) return (self.model) def bag_rand_forest(self): bag_clf = BaggingClassifier(DecisionTreeClassifier(max_features="auto", max_leaf_nodes=16), n_estimators=500, max_samples=1.0, bootstrap=True, n_jobs=-1) def oob_score(self): val = self.model.oob_score_ return (val) def predictor(self, predict_val): return_val = self.model.predict(predict_val) return (return_val) def predict_percent(self, predict_val): pred_percent = self.model.predict_proba(predict_val) def predict_scores(self, predict_val): scores = self.model.decision_function(predict_val) return (scores) def accuracy(self, test, x_test, y_test): from sklearn.metrics import accuracy_score self.model.fit(self.X, self.labels) y_pred = self.model.predict(x_test) val = accuracy_score(y_test, y_pred) return accuracy_score(y_test, y_pred)
class SVM(Model): def __init__(self, *args, **kwargs): self.clf = BaggingClassifier(LinearSVC(penalty='l1', dual=False, tol=1e-7), n_jobs=-1) def train(self, x, y): self.clf.fit(x, y) def predict(self, x): pred = self.clf.decision_function(x) action = ACTIONS[self.clf.classes_[np.argmax(pred)]] alter_action = ACTIONS[self.clf.classes_[np.argsort(pred).squeeze()[-2]]] return action, alter_action
class BaggedDecisionTreeClassifier(): def __init__(self, n_estimators=20, bootstrap=True, bootstrap_features=False, oob_score=False, max_depth=None, min_samples_leaf=20, warm_start=False, n_jobs=None, early_stopping='auto', verbose=0, random_state=None): self.tree = DecisionTreeClassifier(max_depth=max_depth, min_samples_leaf=min_samples_leaf) self.BagDT = BaggingClassifier(base_estimator=self.tree, n_estimators=n_estimators, bootstrap=bootstrap, bootstrap_features=bootstrap_features, oob_score=oob_score, warm_start=warm_start, n_jobs=n_jobs, random_state=random_state, verbose=verbose) def decision_function(self, X): return self.BagDT.decision_function(X) def fit(self, X, y, sample_weight=None): self.BagDT.fit(X, y, sample_weight=sample_weight) return self.BagDT def get_params(self, deep=True): return self.BagDT.get_params(deep=deep) def predict(self, X): return self.BagDT.predict(X) def predict_log_proba(self, X): return self.BagDT.predict_log_proba(X) def predict_proba(self, X): return self.BagDT.predict_proba(X) def score(self, X, y, sample_weight=None): return self.BagDT.score(X, y, sample_weight=sample_weight) def set_params(self, **params): return self.BagDT.set_params(**params)
def bagging(X_train, y_train, cart, X_test, y_test): seed = 7 kfold = model_selection.KFold(n_splits=10) num_trees = 100 model = BaggingClassifier(base_estimator=cart, n_estimators=num_trees, random_state=seed).fit(X_train, y_train) results = model.score(X_test, y_test) y_df = model.decision_function(X_test) y_pred = model.predict(X_test) precicions, recall, t = precision_recall_curve(y_test, y_df, pos_label=1) print(precicions[:10], recall[:10], t[:10]) precision = precicions[0] confmat = confusion_matrix(y_test, y_pred) return results, precision, confmat
class BaggingClassifierImpl: def __init__( self, base_estimator=None, n_estimators=10, max_samples=1.0, max_features=1.0, bootstrap=True, bootstrap_features=False, oob_score=False, warm_start=False, n_jobs=None, random_state=None, verbose=0, ): self._hyperparams = { "base_estimator": make_sklearn_compat(base_estimator), "n_estimators": n_estimators, "max_samples": max_samples, "max_features": max_features, "bootstrap": bootstrap, "bootstrap_features": bootstrap_features, "oob_score": oob_score, "warm_start": warm_start, "n_jobs": n_jobs, "random_state": random_state, "verbose": verbose, } self._wrapped_model = SKLModel(**self._hyperparams) def fit(self, X, y=None): if y is not None: self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def predict(self, X): return self._wrapped_model.predict(X) def predict_proba(self, X): return self._wrapped_model.predict_proba(X) def decision_function(self, X): return self._wrapped_model.decision_function(X)
class BaggingProcessor(Processor): def __init__(self, name='bagging', c=1.0, keys_correspondences=DEFAULT_KEYS_CORRESPONDENCES): super(BaggingProcessor, self).__init__(name) self._model = BaggingClassifier(LinearSVC(C=c), max_samples=0.5, max_features=0.8) self.keys_correspondences = keys_correspondences def to_dict(self): output_dict = { 'data': np.array(pickle.dumps(self._model)), } return output_dict def from_dict(self, dict): self._model = pickle.loads(dict['data']) def fit(self, x): labels_key = self.keys_correspondences["labels_key"] features_key = self.keys_correspondences["features_key"] labels = copy.deepcopy(x[labels_key]) labels[labels > 0] = 1 self._model.fit(x[features_key], labels) def run(self, x): features_key = self.keys_correspondences["features_key"] scores_key = self.keys_correspondences["scores_key"] output_type_key = self.keys_correspondences["output_type_key"] x[scores_key] = self._model.decision_function(x[features_key]) x[output_type_key] = ProcessorOutputType.LIKELIHOOD return x def __str__(self): description = {'type': 'Bagging Processor', 'name': self.name} return str(description)
max_iter=200), n_estimators=10, max_samples=0.5, max_features=0.5) elif j == 4: clf = BaggingClassifier(base_estimator=LinearSVC( penalty='l2', random_state=0, tol=1e-4), n_estimators=10, max_samples=0.5, max_features=0.5) skf = StratifiedKFold(n_splits=10) skf_accuracy = [] for train, test in skf.split(X, y): clf.fit(X[train], y[train]) if n_classes.size < 3: skf_accuracy.append( roc_auc_score(y[test], clf.predict_proba(X[test])[:, 1] if j != 4 else clf.decision_function(X[test]), average='micro')) else: ytest_one_hot = label_binarize(y[test], n_classes) skf_accuracy.append( roc_auc_score(ytest_one_hot, clf.predict_proba(X[test]) if j != 4 else clf.decision_function(X[test]), average='micro')) accuracy = np.mean(skf_accuracy) print(cl[j], 'ACU:/%.3f' % accuracy)
# model = joblib.load("%s/svm_model" % training_set_path) for test_set_path in [ "./our_dataset/testing_set/LH_Protein/structures/", "./our_dataset/testing_set/LH_NonProtein/structures/", "./our_dataset/validation_set/structures/", "./our_dataset/homology/LH_Protein/structures/", "./our_dataset/homology/LH_NonProtein/structures/" ]: print("Importing descriptors from the testing set %s." % test_set_path) X_test, y_test, labels_test = loadSamples( test_set_path, "*_ab_test_descriptors_N5.txt", len("_ab_test_descriptors_N5.txt")) print("Number of features: %d." % X_test.shape[-1]) X_test_scale = scaler.transform(X_test.todense()) print "Predicting the testing set %s." % test_set_path y_score = model.decision_function(X_test_scale) get_indexes = lambda x, xs: [ i for (y, i) in zip(xs, range(len(xs))) if x == y ] pdb_ids = sorted(set(labels_test)) for file_id in pdb_ids: pdb_id_indices = get_indexes(file_id, labels_test) with open("%s/%s_ab_patch_score.txt" % (test_set_path, file_id), "w") as out_scores: for p in y_score[pdb_id_indices]: out_scores.write("%f\n" % p)
def test_parallel(): """Check parallel computations.""" rng = check_random_state(0) # Classification X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=rng) for n_jobs in [-1, 3]: ensemble = BaggingClassifier(DecisionTreeClassifier(), n_jobs=n_jobs, random_state=0).fit(X_train, y_train) # predict_proba ensemble.set_params(n_jobs=1) y1 = ensemble.predict_proba(X_test) ensemble.set_params(n_jobs=2) y2 = ensemble.predict_proba(X_test) assert_array_almost_equal(y1, y2) ensemble = BaggingClassifier(DecisionTreeClassifier(), n_jobs=1, random_state=0).fit(X_train, y_train) y3 = ensemble.predict_proba(X_test) assert_array_almost_equal(y1, y3) # decision_function ensemble = BaggingClassifier(SVC(), n_jobs=n_jobs, random_state=0).fit(X_train, y_train) ensemble.set_params(n_jobs=1) decisions1 = ensemble.decision_function(X_test) ensemble.set_params(n_jobs=2) decisions2 = ensemble.decision_function(X_test) assert_array_almost_equal(decisions1, decisions2) ensemble = BaggingClassifier(SVC(), n_jobs=1, random_state=0).fit(X_train, y_train) decisions3 = ensemble.decision_function(X_test) assert_array_almost_equal(decisions1, decisions3) # Regression X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=rng) for n_jobs in [-1, 3]: ensemble = BaggingRegressor(DecisionTreeRegressor(), n_jobs=3, random_state=0).fit(X_train, y_train) ensemble.set_params(n_jobs=1) y1 = ensemble.predict(X_test) ensemble.set_params(n_jobs=2) y2 = ensemble.predict(X_test) assert_array_almost_equal(y1, y2) ensemble = BaggingRegressor(DecisionTreeRegressor(), n_jobs=1, random_state=0).fit(X_train, y_train) y3 = ensemble.predict(X_test) assert_array_almost_equal(y1, y3)
class HistRandomForestClassifier(): def __init__(self, loss='auto', max_leaf_nodes=31, max_depth=None, min_samples_leaf=20, l2_regularization=0, max_bins=255, n_estimators=20, max_samples=1.0, bootstrap=True, bootstrap_features=False, oob_score=False, categorical_features=None, monotonic_cst=None, warm_start=False, n_jobs=None, early_stopping='auto', scoring='loss', validation_fraction=0.1, n_iter_no_change=10, tol=1e-7, verbose=0, random_state=None): self.loss = loss self.max_leaf_nodes = max_leaf_nodes self.max_depth = max_depth self.min_samples_leaf = min_samples_leaf self.l2_regularization = l2_regularization self.max_bins = max_bins self.n_estimators = n_estimators self.max_samples = max_samples self.bootstrap = bootstrap self.bootstrap_features = bootstrap_features self.oob_score = oob_score self.categorical_features = categorical_features self.monotonic_cst = monotonic_cst self.warm_start = warm_start self.n_jobs = n_jobs self.early_stopping = early_stopping self.scoring = scoring self.validation_fraction = validation_fraction self.n_iter_no_change = n_iter_no_change self.tol = tol self.verbose = verbose self.random_state = random_state self.tree = HistGradientBoostingClassifier( loss=loss, learning_rate=1, max_iter=1, max_leaf_nodes=max_leaf_nodes, max_depth=max_depth, min_samples_leaf=min_samples_leaf, l2_regularization=l2_regularization, max_bins=max_bins, categorical_features=categorical_features, monotonic_cst=monotonic_cst, early_stopping=early_stopping, scoring=scoring, validation_fraction=validation_fraction, n_iter_no_change=n_iter_no_change, tol=tol, verbose=verbose, random_state=random_state) self.HistRF = BaggingClassifier(base_estimator=self.tree, n_estimators=n_estimators, bootstrap=bootstrap, bootstrap_features=bootstrap_features, oob_score=oob_score, warm_start=warm_start, n_jobs=n_jobs, random_state=random_state, verbose=verbose) def decision_function(self, X): return self.HistRF.decision_function(X) def fit(self, X, y, sample_weight=None): self.HistRF.fit(X, y, sample_weight=sample_weight) return self.HistRF def get_params(self, deep=True): return self.HistRF.get_params(deep=deep) def predict(self, X): return self.HistRF.predict(X) def predict_log_proba(self, X): return self.HistRF.predict_log_proba(X) def predict_proba(self, X): return self.HistRF.predict_proba(X) def score(self, X, y, sample_weight=None): return self.HistRF.score(X, y, sample_weight=sample_weight) def set_params(self, **params): return self.HistRF.set_params(**params)
clf = BaggingClassifier(base_estimator=KNeighborsClassifier(n_neighbors=3), n_estimators=10, max_samples=0.5, max_features=0.5) elif j == 3: clf = BaggingClassifier(base_estimator=MLPClassifier(hidden_layer_sizes=(100), activation='relu', solver='adam', batch_size=128, alpha=1e-4, learning_rate_init=1e-3, learning_rate='adaptive', tol=1e-4, max_iter=200), n_estimators=10, max_samples=0.5, max_features=0.5) elif j == 4: clf = BaggingClassifier(base_estimator=LinearSVC(penalty='l2', random_state=0, tol=1e-4), n_estimators=10, max_samples=0.5, max_features=0.5) skf = StratifiedKFold(n_splits=10) skf_accuracy = [] for train, test in skf.split(X, y): clf.fit(X[train], y[train]) if n_classes.size < 3: skf_accuracy.append(roc_auc_score(y[test], clf.predict_proba(X[test])[:, 1] if j != 4 else clf.decision_function(X[test]), average='micro')) else: ytest_one_hot = label_binarize(y[test], n_classes) skf_accuracy.append(roc_auc_score(ytest_one_hot, clf.predict_proba(X[test]) if j != 4 else clf.decision_function(X[test]), average='micro')) accuracy = np.mean(skf_accuracy) of.write(f'{accuracy:.6f}|') print(f'{time.time() - start_time:.3f}s') of.write('\n')
class _BaggingClassifierImpl: def __init__( self, base_estimator=None, n_estimators=10, *, max_samples=1.0, max_features=1.0, bootstrap=True, bootstrap_features=False, oob_score=False, warm_start=False, n_jobs=None, random_state=None, verbose=0, ): estimator_impl = base_estimator self._hyperparams = { "base_estimator": estimator_impl, "n_estimators": n_estimators, "max_samples": max_samples, "max_features": max_features, "bootstrap": bootstrap, "bootstrap_features": bootstrap_features, "oob_score": oob_score, "warm_start": warm_start, "n_jobs": n_jobs, "random_state": random_state, "verbose": verbose, } self._wrapped_model = SKLModel(**self._hyperparams) self._hyperparams["base_estimator"] = base_estimator def get_params(self, deep=True): out = self._wrapped_model.get_params(deep=deep) # we want to return the lale operator, not the underlying impl out["base_estimator"] = self._hyperparams["base_estimator"] return out def fit(self, X, y, sample_weight=None): if isinstance(X, pd.DataFrame): feature_transformer = FunctionTransformer( func=lambda X_prime: pd.DataFrame(X_prime, columns=X.columns), inverse_func=None, check_inverse=False, ) self._hyperparams["base_estimator"] = ( feature_transformer >> self._hyperparams["base_estimator"]) self._wrapped_model = SKLModel(**self._hyperparams) self._wrapped_model.fit(X, y, sample_weight) return self def predict(self, X, **predict_params): return self._wrapped_model.predict(X, **predict_params) def predict_proba(self, X): return self._wrapped_model.predict_proba(X) def predict_log_proba(self, X): return self._wrapped_model.predict_log_proba(X) def decision_function(self, X): return self._wrapped_model.decision_function(X) def score(self, X, y, sample_weight=None): return self._wrapped_model.score(X, y, sample_weight)