Ejemplo n.º 1
0
    def test_pipeline_lr(self):
        data = numpy.array([[0.0, 0.0], [0.0, 0.0], [1.0, 1.0], [1.0, 1.0]],
                           dtype=numpy.float32)
        yd = numpy.array([0, 1, 0, 2], dtype=numpy.float32)
        pipe = Pipeline([('norm', MinMaxScaler()),
                         ('clr', LogisticRegression())])
        pipe.fit(data, yd)

        options = {'clr__raw_scores': True, 'clr__zipmap': False}
        new_options = _process_options(pipe, options)
        exp = {'raw_scores': True, 'zipmap': False}
        op = pipe.steps[1][1]
        self.assertIn(id(op), new_options)
        self.assertEqual(new_options[id(op)], exp)

        model_def = to_onnx(pipe,
                            data,
                            options={
                                'clr__raw_scores': True,
                                'clr__zipmap': False
                            })
        sonx = str(model_def)
        assert "SOFTMAX" not in sonx
Ejemplo n.º 2
0
    def test_pipeline_column_transformer(self):

        iris = load_iris()
        X = iris.data[:, :3]
        y = iris.target
        X_train = pandas.DataFrame(X, columns=["vA", "vB", "vC"])
        X_train["vcat"] = X_train["vA"].apply(lambda x: "cat1"
                                              if x > 0.5 else "cat2")
        X_train["vcat2"] = X_train["vB"].apply(lambda x: "cat3"
                                               if x > 0.5 else "cat4")
        y_train = y % 2
        numeric_features = [0, 1, 2]  # ["vA", "vB", "vC"]
        categorical_features = [3, 4]  # ["vcat", "vcat2"]

        classifier = LogisticRegression(C=0.01,
                                        class_weight=dict(
                                            zip([False, True], [0.2, 0.8])),
                                        n_jobs=1,
                                        max_iter=10,
                                        solver="lbfgs",
                                        tol=1e-3)

        numeric_transformer = Pipeline(
            steps=[("imputer", SimpleImputer(
                strategy="median")), ("scaler", StandardScaler())])

        categorical_transformer = Pipeline(steps=[(
            "onehot", OneHotEncoder(sparse=True, handle_unknown="ignore")
        ), ("tsvd",
            TruncatedSVD(n_components=1, algorithm="arpack", tol=1e-4))])

        preprocessor = ColumnTransformer(
            transformers=[("num", numeric_transformer, numeric_features),
                          ("cat", categorical_transformer,
                           categorical_features)])

        model = Pipeline(steps=[("precprocessor",
                                 preprocessor), ("classifier", classifier)])

        model.fit(X_train, y_train)
        names = list(enumerate_model_names(model, short=False))
        simple = [_[0] for _ in names]
        assert len(set(simple)) == len(simple)
        names = list(enumerate_model_names(model))
        simple2 = [_[0] for _ in names]
        assert len(simple2) == len(simple)
        exp = [
            '', 'precprocessor', 'precprocessor__num',
            'precprocessor__num__imputer', 'precprocessor__num__scaler',
            'precprocessor__cat', 'precprocessor__cat__onehot',
            'precprocessor__cat__onehot__categories___0',
            'precprocessor__cat__onehot__categories___1',
            'precprocessor__cat__tsvd', 'classifier'
        ]
        self.assertEqual(simple2[:len(exp) - 2], exp[:-2])

        initial_type = [("numfeat", FloatTensorType([None, 3])),
                        ("strfeat", StringTensorType([None, 2]))]
        model_onnx = convert_sklearn(model,
                                     initial_types=initial_type,
                                     target_opset=TARGET_OPSET)
        dump_data_and_model(
            X_train,
            model,
            model_onnx,
            basename="SklearnPipelineColumnTransformerPipelinerOptions1")

        options = {'classifier': {'zipmap': False}}
        new_options = _process_options(model, options)
        assert len(new_options) == 2

        model_onnx = convert_sklearn(model,
                                     initial_types=initial_type,
                                     options={'classifier': {
                                         'zipmap': False
                                     }},
                                     target_opset=TARGET_OPSET)
        assert 'zipmap' not in str(model_onnx).lower()
        dump_data_and_model(
            X_train,
            model,
            model_onnx,
            basename="SklearnPipelineColumnTransformerPipelinerOptions2")

        options = {'classifier__zipmap': False}
        new_options = _process_options(model, options)
        assert len(new_options) == 2

        model_onnx = convert_sklearn(model,
                                     initial_types=initial_type,
                                     options=options,
                                     target_opset=TARGET_OPSET)
        assert 'zipmap' not in str(model_onnx).lower()
        dump_data_and_model(
            X_train,
            model,
            model_onnx,
            basename="SklearnPipelineColumnTransformerPipelinerOptions2")

        options = {id(model): {'zipmap': False}}
        new_options = _process_pipeline_options(model, options)

        model_onnx = convert_sklearn(model,
                                     initial_types=initial_type,
                                     options={id(model): {
                                                  'zipmap': False
                                              }},
                                     target_opset=TARGET_OPSET)
        assert 'zipmap' not in str(model_onnx).lower()
        dump_data_and_model(
            X_train,
            model,
            model_onnx,
            basename="SklearnPipelineColumnTransformerPipelinerOptions2")