def test_extratreesregressor_decision_path(self): model = ExtraTreesRegressor(max_depth=2, n_estimators=2) X, y = make_classification(10, n_features=4, random_state=42) X = X[:, :2] model.fit(X, y) initial_types = [('input', FloatTensorType((None, X.shape[1])))] model_onnx = convert_sklearn( model, initial_types=initial_types, options={id(model): {'decision_path': True}}) sess = InferenceSession(model_onnx.SerializeToString()) res = sess.run(None, {'input': X.astype(numpy.float32)}) pred = model.predict(X) assert_almost_equal(pred, res[0].ravel()) dec = model.decision_path(X) exp = binary_array_to_string(dec[0].todense()) got = numpy.array([''.join(row) for row in res[1]]) assert exp == got.ravel().tolist()
class ExtraTreesTransformer(object): def __init__(self, task='classification', n_estimators=1000): if task == 'classification': self.extratrees = ExtraTreesClassifier(n_estimators=n_estimators, random_state=42) else: self.extratrees = ExtraTreesRegressor(n_estimators=n_estimators, random_state=42) def fit(self, x, y=None): if y is None: y = np.random.randint(2, size=x.shape[0]) self.extratrees.fit(x, y) return self def transform(self, x): xy, _ = self.extratrees.decision_path(x) return xy def fit_transform(self, x, y): return self.fit(x, y).transform(x)
class LCTF(): def __init__(self, n_est=200, stop_crit=5, dw=0.9, dim=2): self.n_est = n_est # number of trees self.stop_crit = stop_crit # tree stop.criterion self.dw = dw # factor used in node filtering self.dim = dim # number of components to be kept in dimensionality reduction def fit_transform(self, X, Y): """Fitting and generating the LCTF space. Parameters ---------- X : matrix of shape = [n_samples, n_features] (i.e., the feature matrix) Y : matrix of shape = [n_samples, n_outputs] (i.e., the label/output matrix) Returns ------- self.treepath : The generated feature representation """ self.clf = ExtraTreesRegressor(n_estimators=self.n_est, max_features='sqrt', max_depth=None, min_samples_leaf=self.stop_crit, random_state=0) self.clf.fit(X, Y) self.treepath = self.clf.decision_path(X)[0] w = self.treepath.sum(0) wlog = np.log(w.astype(float)) + 0.00001 self.cw = np.power(wlog, -1) self.treepath = self.treepath.multiply(self.cw).toarray().astype(float) self.ind = np.where(w > (X.shape[0] * self.dw))[1] self.treepath = np.delete(self.treepath, self.ind, axis=1) self.pca = PCA(self.dim) self.treepath = self.pca.fit_transform(self.treepath) return self.treepath def transform(self, Xtest): """Using the fitted model to generate features for new data. Parameters ---------- Xtest : matrix of shape = [n_samples, n_features] (i.e., the feature matrix) Returns ------- self.treepathtest : The generated feature representation """ self.treepathtest = self.clf.decision_path(Xtest)[0] self.treepathtest = self.treepathtest.multiply( self.cw).toarray().astype(float) self.treepathtest = np.delete(self.treepathtest, self.ind, axis=1) self.treepathtest = self.pca.transform(self.treepathtest) return self.treepathtest