Esempio n. 1
0
 def test_register_converters_skl_op(self):
     res = sklearn_operators(extended=True)
     names = set(_['name'] for _ in res)
     self.assertIn('LGBMClassifier', names)
     self.assertIn('LGBMRegressor', names)
     self.assertIn('XGBClassifier', names)
     self.assertIn('XGBRegressor', names)
Esempio n. 2
0
    def test_write_documentation_converters(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")
        subs = []
        for sub in sorted(sklearn__all__):
            models = sklearn_operators(sub)
            if len(models) > 0:
                rows = []
                for row in enumerate_visual_onnx_representation_into_rst(sub):
                    self.assertIn("digraph", row)
                    rows.append(row)
                if len(rows) == 0:
                    continue
                rows = [
                    ".. _l-skl2onnx-%s:" % sub, "", "=" * len(sub), sub,
                    "=" * len(sub), "", ".. contents::", "    :local:", ""
                ] + rows
                rows.append('')
                subs.append(sub)
                fLOG("subfolder '{}' - {} scenarios.".format(sub, len(models)))
                if len(subs) > 2:
                    break

        self.assertGreater(len(subs), 2)
Esempio n. 3
0
 def test_sklearn_operator_here(self):
     subfolders = ['ensemble'] + ['mlprodict.onnx_conv']
     for sub in sorted(subfolders):
         models = sklearn_operators(sub)
         if len(models) == 0:
             raise AssertionError(
                 "models is empty for subfolder '{}'.".format(sub))
         if sub == "mlprodict.onnx_conv":
             names = set(_['name'] for _ in models)
             self.assertIn("LGBMClassifier", names)
Esempio n. 4
0
 def test_check_whole_model_list(self):
     res = sklearn_operators(extended=True)
     rows = []
     for model in res:
         name = model['name']
         row = dict(name=name)
         try:
             prob = main_find_suitable_problem(model['cl'])
             row['prob'] = prob
         except RuntimeError:
             pass
         rows.append(row)
     set_names = set(_['name'] for _ in rows)
     names = list(_['name'] for _ in rows)
     self.assertEqual(len(set_names), len(names))
     xgb_reg = [_ for _ in rows if _['name'] == 'XGBRegressor']
     self.assertEqual(len(xgb_reg), 1)
     xgb_reg = xgb_reg[0]
     exp = find_suitable_problem(XGBRegressor)
     self.assertEqual(list(sorted(exp)), list(sorted(xgb_reg['prob'])))
    def test_sklearn_operators(self):
        res = sklearn_operators()
        self.assertGreater(len(res), 1)
        self.assertEqual(len(res[0]), 4)

        short = ['IsotonicRegression']
        for model in res:
            if model['name'] not in short:
                continue

            prob = find_suitable_problem(model['cl'])
            self.assertNotEmpty(prob)
            if model['name'] == 'IsotonicRegression':
                self.assertEqual(prob, ['~num+y-tr-1d', '~b-reg-1d'])

        names = set(_['name'] for _ in res)
        self.assertIn('Perceptron', names)
        self.assertIn('TfidfVectorizer', names)
        ra = {
            'BaseEnsemble', 'NearestNeighbors', 'AgglomerativeClustering',
            'DBSCAN', 'OPTICS', 'SpectralClustering', 'SpectralBiclustering',
            'SpectralCoclustering'
        }
        for model in res:
            if model['name'] in ra:
                self.assertRaise(
                    lambda m=model: find_suitable_problem(m['cl']),
                    RuntimeError)
                continue

            prob = find_suitable_problem(model['cl'])
            self.assertNotEmpty(prob)
            if model['name'] == 'IsotonicRegression':
                self.assertEqual(prob, ['~num+y-tr-1d', '~b-reg-1d'])
            elif model['name'] == 'NearestCentroid':
                self.assertEqual(prob, ['~b-cl-nop', '~b-cl-nop-64'])
            self.assertIsInstance(prob, list)
Esempio n. 6
0
 def test_sklearn_operators(self):
     res = sklearn_operators(extended=True)
     self.assertGreater(len(res), 1)
     self.assertEqual(len(res[0]), 4)
Esempio n. 7
0
def generate_dot_converters(app):
    """
    Creates visual representation of each converters
    implemented in :epkg:`sklearn-onnx`.
    """
    from mlprodict.onnxrt.validate.validate import sklearn_operators, sklearn__all__
    from mlprodict.onnxrt.doc.doc_write_helper import enumerate_visual_onnx_representation_into_rst
    logger = getLogger('mlprodict')
    srcdir = app.builder.srcdir
    whe = os.path.join(os.path.abspath(srcdir), "skl_converters")
    logger.info(
        "[mlprodict] create visual representation in '{}'.".format(whe))
    print(
        "[mlprodict-sphinx] create visual representation in '{}'.".format(whe))

    index = os.path.join(whe, "index.rst")
    subfolders = sklearn__all__ + ['mlprodict.onnx_conv']
    subs = []
    for sub in sorted(subfolders):
        logger.info("[mlprodict] graph for subfolder '{}'.".format(sub))
        print("[mlprodict] graph for subfolder '{}'.".format(sub))
        models = sklearn_operators(sub)
        if len(models) > 0:
            rows = [
                ".. _l-skl2onnx-%s:" % sub, "", "=" * len(sub), sub,
                "=" * len(sub), "", ".. toctree::", ""
            ]
            for irow, text in enumerate(
                    enumerate_visual_onnx_representation_into_rst(sub)):
                subname = "visual-%s-%03d.rst" % (sub, irow)
                pagename = os.path.join(whe, subname)
                with open(pagename, 'w', encoding='utf-8') as f:
                    f.write(text)
                rows.append("    " + subname)
            if len(rows) == 0:
                continue
            rows.append('')
            dest = os.path.join(whe, "skl2onnx_%s.rst" % sub)
            with open(dest, "w", encoding="utf-8") as f:
                f.write("\n".join(rows))
            subs.append(sub)
            logger.info("[mlprodict] wrote '{}' - {} scenarios.".format(
                sub, len(models)))

    print("[mlprodict-sphinx] done visual representation in '{}'.".format(whe))
    assert len(subs) >= 2

    logger.info("[mlprodict] write '{}'.".format(index))
    with open(index, "w", encoding="utf-8") as f:
        f.write(
            dedent("""
        Visual Representation of scikit-learn models
        ============================================

        :epkg:`sklearn-onnx` converts many models from
        :epkg:`scikit-learn` into :epkg:`ONNX`. Every of
        them is a graph made of :epkg:`ONNX` mathematical functions
        (see :ref:`l-onnx-runtime-operators`,
        :epkg:`ONNX Operators`, :epkg:`ONNX ML Operators`).
        The following sections display a visual representation
        of each converted model. Every graph
        represents one ONNX graphs obtained after a model
        is fitted. The structure may change is the model is trained
        again.

        .. toctree::
            :maxdepth: 1

        """))
        for sub in subs:
            f.write("    skl2onnx_%s\n" % sub)
        f.write('')