def test_voting_regression(self): model = VotingRegressor([('lr', LinearRegression()), ('dt', DecisionTreeRegressor())]) model, _ = fit_regression_model(model) names = list(enumerate_model_names(model)) assert len(names) == 3 assert [_[0] for _ in names] == ['', 'lr', 'dt'] assert all(map(lambda x: isinstance(x, tuple), names)) assert all(map(lambda x: len(x) == 2, names))
def test_pipeline(self): data = numpy.array( [[0.0, 0.0], [0.0, 0.0], [1.0, 1.0], [1.0, 1.0]], dtype=numpy.float32, ) model = Pipeline([ ("scaler1", StandardScaler()), ( "union", FeatureUnion([ ("scaler2", StandardScaler()), ("scaler3", MinMaxScaler()), ]), ), ]) model.fit(data) names = list(enumerate_model_names(model)) assert [_[0] for _ in names] == ['', 'scaler1', 'union', 'union__scaler2', 'union__scaler3']
def test_random_forest(self): model = RandomForestRegressor() model, _ = fit_regression_model(model) names = list(enumerate_model_names(model)) assert all(map(lambda x: isinstance(x, tuple), names)) assert all(map(lambda x: len(x) == 2, names))
def test_pipeline_column_transformer(self): iris = load_iris() X = iris.data[:, :3] y = iris.target X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"]) X_train["vcat"] = X_train["vA"].apply(lambda x: "cat1" if x > 0.5 else "cat2") X_train["vcat2"] = X_train["vB"].apply(lambda x: "cat3" if x > 0.5 else "cat4") y_train = y % 2 numeric_features = [0, 1, 2] # ["vA", "vB", "vC"] categorical_features = [3, 4] # ["vcat", "vcat2"] classifier = LogisticRegression(C=0.01, class_weight=dict( zip([False, True], [0.2, 0.8])), n_jobs=1, max_iter=10, solver="lbfgs", tol=1e-3) numeric_transformer = Pipeline( steps=[("imputer", SimpleImputer( strategy="median")), ("scaler", StandardScaler())]) categorical_transformer = Pipeline(steps=[( "onehot", OneHotEncoder(sparse=True, handle_unknown="ignore") ), ("tsvd", TruncatedSVD(n_components=1, algorithm="arpack", tol=1e-4))]) preprocessor = ColumnTransformer( transformers=[("num", numeric_transformer, numeric_features), ("cat", categorical_transformer, categorical_features)]) model = Pipeline(steps=[("precprocessor", preprocessor), ("classifier", classifier)]) model.fit(X_train, y_train) names = list(enumerate_model_names(model, short=False)) simple = [_[0] for _ in names] assert len(set(simple)) == len(simple) names = list(enumerate_model_names(model)) simple2 = [_[0] for _ in names] assert len(simple2) == len(simple) exp = [ '', 'precprocessor', 'precprocessor__num', 'precprocessor__num__imputer', 'precprocessor__num__scaler', 'precprocessor__cat', 'precprocessor__cat__onehot', 'precprocessor__cat__onehot__categories___0', 'precprocessor__cat__onehot__categories___1', 'precprocessor__cat__tsvd', 'classifier' ] self.assertEqual(simple2[:len(exp) - 2], exp[:-2]) initial_type = [("numfeat", FloatTensorType([None, 3])), ("strfeat", StringTensorType([None, 2]))] model_onnx = convert_sklearn(model, initial_types=initial_type, target_opset=TARGET_OPSET) dump_data_and_model( X_train, model, model_onnx, basename="SklearnPipelineColumnTransformerPipelinerOptions1") options = {'classifier': {'zipmap': False}} new_options = _process_options(model, options) assert len(new_options) == 2 model_onnx = convert_sklearn(model, initial_types=initial_type, options={'classifier': { 'zipmap': False }}, target_opset=TARGET_OPSET) assert 'zipmap' not in str(model_onnx).lower() dump_data_and_model( X_train, model, model_onnx, basename="SklearnPipelineColumnTransformerPipelinerOptions2") options = {'classifier__zipmap': False} new_options = _process_options(model, options) assert len(new_options) == 2 model_onnx = convert_sklearn(model, initial_types=initial_type, options=options, target_opset=TARGET_OPSET) assert 'zipmap' not in str(model_onnx).lower() dump_data_and_model( X_train, model, model_onnx, basename="SklearnPipelineColumnTransformerPipelinerOptions2") options = {id(model): {'zipmap': False}} new_options = _process_pipeline_options(model, options) model_onnx = convert_sklearn(model, initial_types=initial_type, options={id(model): { 'zipmap': False }}, target_opset=TARGET_OPSET) assert 'zipmap' not in str(model_onnx).lower() dump_data_and_model( X_train, model, model_onnx, basename="SklearnPipelineColumnTransformerPipelinerOptions2")