コード例 #1
0
    def test_pandas_rst_right(self):
        df = pandas.DataFrame([{
            "A": "x",
            "AA": "xx",
            "AAA": "xxx"
        }, {
            "AA": "xxxxxxx",
            "AAA": "xxx"
        }])
        rst = df2rst(df, align="r")
        exp = """+---+---------+-----+
                 | A |      AA | AAA |
                 +===+=========+=====+
                 | x |      xx | xxx |
                 +---+---------+-----+
                 |   | xxxxxxx | xxx |
                 +---+---------+-----+
                 """.replace("                 ", "")
        self.assertEqual(rst, exp)

        rst = df2rst(df, align="c")
        exp = """+---+---------+-----+
                 | A |   AA    | AAA |
                 +===+=========+=====+
                 | x |   xx    | xxx |
                 +---+---------+-----+
                 |   | xxxxxxx | xxx |
                 +---+---------+-----+
                 """.replace("                 ", "")
        self.assertEqual(rst, exp)
コード例 #2
0
 def simple():
     df = pandas.DataFrame([{
         "A": "x",
         "AA": "xx",
         "AAA": "xxx"
     }, {
         "AA": "xxxxxxx",
         "AAA": "xxx"
     }])
     for i in range(0, 99):
         df2rst(df)
     return df2rst(df)
コード例 #3
0
    def test_pandas_rst_right_format_number(self):
        df = pandas.DataFrame([{
            "A": "x",
            "AA": "xx",
            "AAA": "xxx"
        }, {
            "AA": "xxxxxxx",
            "AAA": "xxx"
        }])
        rst = df2rst(df, align="r", number_format=4)
        exp = """+---+---------+-----+
                 | A |      AA | AAA |
                 +===+=========+=====+
                 | x |      xx | xxx |
                 +---+---------+-----+
                 |   | xxxxxxx | xxx |
                 +---+---------+-----+
                 """.replace("                 ", "")
        self.assertEqual(rst, exp)

        rst = df2rst(df, align="c", number_format=4)
        exp = """+---+---------+-----+
                 | A |   AA    | AAA |
                 +===+=========+=====+
                 | x |   xx    | xxx |
                 +---+---------+-----+
                 |   | xxxxxxx | xxx |
                 +---+---------+-----+
                 """.replace("                 ", "")
        self.assertEqual(rst, exp)

        df = pandas.DataFrame([{
            "A": "x",
            "AA": "xx",
            "AAA": "xxx",
            'N': 0.5123456
        }, {
            "AA": "xxxxxxx",
            "AAA": "xxx"
        }])
        rst = df2rst(df, number_format=4)
        exp = """+---+---------+-----+--------+
                 | A | AA      | AAA | N      |
                 +===+=========+=====+========+
                 | x | xx      | xxx | 0.5123 |
                 +---+---------+-----+--------+
                 |   | xxxxxxx | xxx |        |
                 +---+---------+-----+--------+
                 """.replace("                 ", "")
        self.assertEqual(rst, exp)
コード例 #4
0
 def test_df2rst_split_col(self):
     data = os.path.join(os.path.abspath(os.path.dirname(__file__)), "data")
     mara = os.path.join(data, "marathon.txt")
     df = pandas.read_csv(mara,
                          names=["city", "year", "time", "seconds"],
                          sep="\t")
     self.assertRaise(
         lambda: df2rst(df,
                        split_col_common=["city", "time"],
                        split_col_subsets=[['time'], ['seconds']]),
         ValueError)
     conv = df2rst(df,
                   split_col_common=["city", "year"],
                   split_col_subsets=[['time'], ['seconds']])
     self.assertIn("| CHICAGO   | 2005 | 7622    |", conv)
コード例 #5
0
    def test_df2rst_split_row_label(self):
        data = os.path.join(os.path.abspath(os.path.dirname(__file__)), "data")
        mara = os.path.join(data, "marathon.txt")
        df = pandas.read_csv(mara,
                             names=["city", "year", "time", "seconds"],
                             sep="\t")
        conv = df2rst(df, split_row="city")
        self.assertIn("+++++++++", conv)
        self.assertIn("| city      | year | time     | seconds |", conv)
        self.assertIn("| PARIS | 2011 | 02:06:29 | 7589    |", conv)

        conv = df2rst(df, split_row="year", label_pattern=".. _lpy-{section}:")
        self.assertIn("++++", conv)
        self.assertIn("| city      | year | time     | seconds |", conv)
        self.assertIn("| FUKUOKA   | 1976 | 02:12:35 | 7955    |", conv)
        self.assertIn(".. _lpy-1949:", conv)
コード例 #6
0
    def test_rt_OneVsRestClassifier_python(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
        logger = getLogger('skl2onnx')
        logger.disabled = True
        verbose = 1 if __name__ == "__main__" else 0

        debug = False
        buffer = []

        def myprint(*args, **kwargs):
            buffer.append(" ".join(map(str, args)))

        rows = list(enumerate_validated_operator_opsets(
            verbose, models={"OneVsRestClassifier"}, opset_min=9,
            opset_max=11, fLOG=myprint, benchmark=True,
            runtime='python', debug=debug,
            filter_exp=lambda m, p: True or 'm-cl' in p))
        self.assertGreater(len(rows), 1)
        self.assertIn('skl_nop', rows[0])
        self.assertIn('onx_size', rows[-1])
        piv = summary_report(DataFrame(rows))
        self.assertGreater(piv.shape[0], 1)
        self.assertGreater(piv.shape[0], 2)
        common, subsets = split_columns_subsets(piv)
        rst = df2rst(piv, number_format=2,
                     replacements={'nan': '', 'ERR: 4convert': ''},
                     split_row=lambda index, dp=piv: build_key_split(
                         dp.loc[index, "name"], index),
                     split_col_common=common,
                     split_col_subsets=subsets,
                     filter_rows=filter_rows,
                     column_size={'problem': 25},
                     label_pattern=".. _lpy-{section}:")
        self.assertIn("opset9 | RT/SKL-N=1", rst)
コード例 #7
0
    def test_rt_KMeans_python(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
        logger = getLogger('skl2onnx')
        logger.disabled = True
        verbose = 2 if __name__ == "__main__" else 0

        debug = False
        buffer = []

        def myprint(*args, **kwargs):
            buffer.append(" ".join(map(str, args)))

        rows = list(enumerate_validated_operator_opsets(
            verbose, models={"KMeans"}, opset_min=11,
            opset_max=11, fLOG=myprint,
            runtime='python', debug=debug))
        self.assertGreater(len(rows), 1)
        self.assertIn('skl_nop', rows[-1])
        keys = set()
        for row in rows:
            keys.update(set(row))
        self.assertIn('onx_size', keys)
        piv = summary_report(DataFrame(rows))
        opset = [c for c in piv.columns if 'opset' in c]
        self.assertTrue('opset11' in opset or 'opset10' in opset)
        self.assertGreater(len(buffer), 1 if debug else 0)
        common, subsets = split_columns_subsets(piv)
        try:
            conv = df2rst(piv, split_col_common=common,  # pylint: disable=E1123
                          split_col_subsets=subsets)
            self.assertIn('| KMeans |', conv)
        except TypeError as e:
            if "got an unexpected keyword argument 'split_col_common'" in str(e):
                return
            raise e
コード例 #8
0
def rst_table_modules(classifier=False):
    """
    Produces a table with some modules useful
    to do machine learning.

    @param      classifier  keep classifiers?
    @return                 string
    """
    try:
        from pymyinstall.packaged import small_set, classifiers2string
    except KeyError:
        from pyquickhelper.pycode.pip_helper import fix_pip_902
        fix_pip_902()
        from pymyinstall.packaged import small_set, classifiers2string
    mod = small_set()
    mod.sort()
    df = pandas.DataFrame(_.as_dict(rst_link=True) for _ in mod)
    if classifier:
        df = df[[
            "usage", "rst_link", "kind", "version", "license", "purpose",
            "classifier"
        ]]
        df["classifier"] = df.apply(
            lambda row: classifiers2string(row["classifier"]), axis=1)
        df.columns = [
            "usage", "name", "kind", "version", "license", "purpose",
            "classifier"
        ]
    else:
        df = df[["usage", "rst_link", "kind", "version", "license", "purpose"]]
        df.columns = ["usage", "name", "kind", "version", "license", "purpose"]
    df["lname"] = df["name"].apply(lambda s: s.lower())
    df = df.sort_values("lname").drop("lname", axis=1)
    df = df.reset_index(drop=True).reset_index(drop=False)
    return df2rst(df)
コード例 #9
0
    def test_df2rst_split_col_row_ref2_func2(self):
        def build_key_split(key, index):
            new_key = str(key).split('`')[1].split('<')[0].strip()
            return new_key

        df = pandas.DataFrame([
            {
                'name': ':ref:`A <A>`',
                'value': 1
            },
            {
                'name': ':ref:`A <A2>`',
                'value': 2
            },
            {
                'name': ':ref:`B <B>`',
                'value': 3
            },
            {
                'name': ':ref:`B <B2>`',
                'value': 4
            },
            {
                'name': ':ref:`A <A3>`',
                'value': 5
            },
        ])
        conv = df2rst(df,
                      split_row=lambda index: build_key_split(
                          df.loc[index, "name"], index))
        self.assertIn("| :ref:`B <B>`  | 3     |", conv)
コード例 #10
0
    def test_all_module_summary(self):

        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")

        mod = ensae_fullset()
        mod.sort()
        df = pandas.DataFrame(_.as_dict(rst_link=True) for _ in mod)
        df = df[[
            "usage", "rst_link", "kind", "version", "installed", "license",
            "purpose", "classifier"
        ]]
        df.columns = [
            "usage", "name", "kind", "version", "installed", "license",
            "purpose", "classifier"
        ]
        lic = df[~df.license.isnull()]
        # fLOG(lic[["name","license"]])
        fLOG("license", lic.shape)
        nolic = df[df.license.isnull()]
        fLOG("no license", nolic.shape)
        fLOG(nolic[["name", "license"]])
        assert lic.shape[0] > 0

        rst = df2rst(df)
        # fLOG(rst)
        assert len(rst) > 1000
コード例 #11
0
def rst_table_modules(classifier=False):
    """
    produces a table with all modules recommended to do machine learning

    @param      classifier  keep classifiers?
    @return                 string
    """
    mod = ensae_fullset()
    mod.sort()
    df = pandas.DataFrame(_.as_dict(rst_link=True) for _ in mod)
    if classifier:
        df = df[[
            "usage", "rst_link", "kind", "version", "license", "purpose",
            "classifier"
        ]]
        df["classifier"] = df.apply(
            lambda row: classifiers2string(row["classifier"]), axis=1)
        df.columns = [
            "usage", "name", "kind", "version", "license", "purpose",
            "classifier"
        ]
    else:
        df = df[["usage", "rst_link", "kind", "version", "license", "purpose"]]
        df.columns = ["usage", "name", "kind", "version", "license", "purpose"]
    df["lname"] = df["name"].apply(lambda s: s.lower())
    df = df.sort_values("lname").drop("lname", axis=1)
    df = df.reset_index(drop=True).reset_index(drop=False)
    return df2rst(df)
コード例 #12
0
    def test_pandas_rst_size_table_title(self):
        df = pandas.DataFrame([{
            "A": "x",
            "AA": "xx",
            "AAA": "xxx"
        }, {
            "AA": "xxxxxxx",
            "AAA": "xxx"
        }])
        rst = df2rst(df,
                     column_size=[1, 1, 2],
                     list_table=True,
                     title="title__")
        exp = """
                    .. list-table:: title__
                        :widths: 1 1 2
                        :header-rows: 1

                        * - A
                          - AA
                          - AAA
                        * - x
                          - xx
                          - xxx
                        * -
                          - xxxxxxx
                          - xxx
                    """.replace("                    ", "")
        self.assertEqual(rst.strip("\n "), exp.strip("\n "))
コード例 #13
0
ファイル: test_name_set.py プロジェクト: sdpython/pymyinstall
    def test_documentation(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")

        r = name_sets_dataframe()
        for m in r:
            fLOG("**", m)
        assert len(r) >= 6
        df = pandas.DataFrame(r)
        df = df[["name", "description"]]
        rst = df2rst(df)
        fLOG(rst)
        assert len(rst) > 0

        if sys.version_info[0] == 2:
            # less tests on Python 2.7
            return

        nb = 0
        for mod in r:
            lp = get_package_set(mod["name"])
            assert len(lp()) > 0
            nb += 1
        assert nb > 0
コード例 #14
0
    def test_documentation(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")

        r = name_sets_dataframe()
        for m in r:
            fLOG("**", m)
        assert len(r) >= 6
        df = pandas.DataFrame(r)
        df = df[["name", "description"]]
        rst = df2rst(df)
        fLOG(rst)
        assert len(rst) > 0

        if sys.version_info[0] == 2:
            # less tests on Python 2.7
            return

        nb = 0
        for mod in r:
            lp = get_package_set(mod["name"])
            if len(lp()) == 0 and mod["name"] != "pywin32":
                raise Exception("issue with module '{0}'".format(mod["name"]))
            nb += 1
        assert nb > 0
コード例 #15
0
    def test_all_module_summary(self):

        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")

        mod = ensae_fullset()
        mod.sort()
        df = pandas.DataFrame(_.as_dict(rst_link=True) for _ in mod)
        df = df[["usage", "rst_link", "kind", "version", "installed",
                 "license", "purpose", "classifier"]]
        df.columns = ["usage", "name", "kind", "version", "installed",
                      "license", "purpose", "classifier"]
        lic = df[~df.license.isnull()]
        # fLOG(lic[["name","license"]])
        fLOG("license", lic.shape)
        nolic = df[df.license.isnull()]
        fLOG("no license", nolic.shape)
        fLOG(nolic[["name", "license"]])
        assert lic.shape[0] > 0

        rst = df2rst(df)
        # fLOG(rst)
        assert len(rst) > 1000
コード例 #16
0
    def test_pandas_rst_size_table_auto_replace(self):
        df = pandas.DataFrame([{
            "A": "x",
            "AA": "xx",
            "AAA": "xxx"
        }, {
            "AA": "xxxxxxx",
            "AAA": "xxx"
        }])
        rst = df2rst(df, list_table=True, replacements={'xxxxxxx': 'gg'})
        exp = """
                    .. list-table::
                        :widths: auto
                        :header-rows: 1

                        * - A
                          - AA
                          - AAA
                        * - x
                          - xx
                          - xxx
                        * -
                          - gg
                          - xxx
                    """.replace("                    ", "")
        self.assertEqual(rst.strip("\n "), exp.strip("\n "))
コード例 #17
0
 def test_df2rst(self):
     data = os.path.join(os.path.abspath(os.path.dirname(__file__)), "data")
     mara = os.path.join(data, "marathon.txt")
     df = pandas.read_csv(mara,
                          names=["city", "year", "time", "seconds"],
                          sep="\t")
     conv = df2rst(df)
     self.assertIn("| city      | year | time     | seconds |", conv)
     self.assertIn("| PARIS     | 2011 | 02:06:29 | 7589    |", conv)
コード例 #18
0
 def simple2():
     df = pandas.DataFrame([{
         "A": "x",
         "AA": "xx",
         "AAA": "xxx"
     }, {
         "AA": "xxxxxxx",
         "AAA": "xxx"
     }])
     return df2rst(df)
コード例 #19
0
    def test_df2rst_split_row(self):
        data = os.path.join(os.path.abspath(os.path.dirname(__file__)), "data")
        mara = os.path.join(data, "marathon.txt")
        df = pandas.read_csv(mara,
                             names=["city", "year", "time", "seconds"],
                             sep="\t")
        conv = df2rst(df, split_row="city")
        self.assertIn("+++++++++", conv)
        self.assertIn("| city      | year | time     | seconds |", conv)
        self.assertIn("| PARIS | 2011 | 02:06:29 | 7589    |", conv)

        conv = df2rst(df, split_row="year")
        self.assertIn("++++", conv)
        self.assertIn("| city      | year | time     | seconds |", conv)
        self.assertIn("| FUKUOKA   | 1976 | 02:12:35 | 7955    |", conv)

        conv = df2rst(df, split_row=["city", "year"])
        self.assertIn("'AMSTERDAM', 1975", conv)
        self.assertIn("| city      | year | time     | seconds |", conv)
コード例 #20
0
 def test_df2rst_split_col_row(self):
     data = os.path.join(os.path.abspath(os.path.dirname(__file__)), "data")
     mara = os.path.join(data, "marathon.txt")
     df = pandas.read_csv(mara,
                          names=["city", "year", "time", "seconds"],
                          sep="\t")
     conv = df2rst(df,
                   split_row="city",
                   split_col_common=["city", "year"],
                   split_col_subsets=[['time'], ['seconds']])
     self.assertIn("+++++++++", conv)
     self.assertIn("| STOCKOLM | 2007 | 8456    |", conv)
コード例 #21
0
 def test_df2rst_split_col_row_ref2(self):
     data = os.path.join(os.path.abspath(os.path.dirname(__file__)), "data")
     mara = os.path.join(data, "marathon.txt")
     df = pandas.read_csv(mara,
                          names=["city", "year", "time", "seconds"],
                          sep="\t")
     df['city'] = df.city.apply(lambda v: ':ref:`{0}`'.format(v))  # pylint: disable=W0108
     conv = df2rst(df,
                   split_row="city",
                   split_col_common=["city", "year"],
                   split_col_subsets=[['time'], ['seconds']])
     self.assertIn("+++++++++", conv)
     self.assertIn("| :ref:`AMSTERDAM` | 1982 | 02:12:15 |", conv)
コード例 #22
0
 def test_df2rst_column_size_i(self):
     data = os.path.join(os.path.abspath(os.path.dirname(__file__)), "data")
     mara = os.path.join(data, "marathon.txt")
     df = pandas.read_csv(mara,
                          names=["city", "year", "time", "seconds"],
                          sep="\t")
     conv = df2rst(df, column_size={0: 40})
     self.assertIn(
         "| city                                     | year | time     | seconds |",
         conv)
     self.assertIn(
         "| PARIS                                    | 2006 | 02:08:03 | 7683    |",
         conv)
コード例 #23
0
 def test_pandas_rst_size(self):
     df = pandas.DataFrame([{"A": "x", "AA": "xx", "AAA": "xxx"},
                            {"AA": "xxxxxxx", "AAA": "xxx"}])
     rst = df2rst(df, column_size=[1, 1, 2])
     exp = """+-----+---------+--------+
              | A   | AA      | AAA    |
              +=====+=========+========+
              | x   | xx      | xxx    |
              +-----+---------+--------+
              |     | xxxxxxx | xxx    |
              +-----+---------+--------+
              """.replace("                 ", "")
     self.assertEqual(rst, exp)
コード例 #24
0
def df2rsthtml(df, format="html", fillna=""):
    """
    Writes a table into RST or HTML format.

    @param      df          dataframe
    @param      format      format
    @param      fillna      fill empty values
    @return                 string
    """
    df = df.fillna(fillna)
    if format == "html":
        return df2html(df)
    elif format == "rst":
        return df2rst(df)
コード例 #25
0
    def test_pandas_rst_right(self):
        df = pandas.DataFrame([{"A": "x", "AA": "xx", "AAA": "xxx"},
                               {"AA": "xxxxxxx", "AAA": "xxx"}])
        rst = df2rst(df, align="r")
        exp = """+-----+---------+-----+
                 |   A |      AA | AAA |
                 +=====+=========+=====+
                 |   x |      xx | xxx |
                 +-----+---------+-----+
                 |     | xxxxxxx | xxx |
                 +-----+---------+-----+
                 """.replace("                 ", "")
        self.assertEqual(rst, exp)

        rst = df2rst(df, align="c")
        exp = """+-----+---------+-----+
                 |  A  |   AA    | AAA |
                 +=====+=========+=====+
                 |  x  |   xx    | xxx |
                 +-----+---------+-----+
                 |     | xxxxxxx | xxx |
                 +-----+---------+-----+
                 """.replace("                 ", "")
        self.assertEqual(rst, exp)
コード例 #26
0
    def test_documentation(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")

        mod = all_set()
        mod.sort()
        df = pandas.DataFrame(_.as_dict(rst_link=True) for _ in mod)
        df = df[["usage", "rst_link", "kind", "version",
                 "license", "purpose", "classifier"]]
        df["classifier"] = df.apply(
            lambda row: classifiers2string(row["classifier"]), axis=1)
        df.columns = ["usage", "name", "kind", "version",
                      "license", "purpose", "classifier"]
        fLOG(df2rst(df))
コード例 #27
0
    def test_pandas_rst_size_table_noheader(self):
        df = pandas.DataFrame([{"A": "x", "AA": "xx", "AAA": "xxx"},
                               {"AA": "xxxxxxx", "AAA": "xxx"}])
        rst = df2rst(df, list_table=True, header=False)
        exp = """
                    .. list-table::
                        :widths: auto

                        * - x
                          - xx
                          - xxx
                        * -
                          - xxxxxxx
                          - xxx
                    """.replace("                    ", "")
        self.assertEqual(rst.strip("\n "), exp.strip("\n "))
コード例 #28
0
    def test_pandas_rst_size_table_number_format(self):
        df = pandas.DataFrame([{"A": 2.12345678, "AA": 3.12345678,
                                "AAA": 4.12345678},
                               {"AA": 2.12345678e10, "AAA": 2.12345678e-10}])
        rst = df2rst(df, list_table=True, header=False, number_format=3)
        exp = """
                    .. list-table::
                        :widths: auto

                        * - 2.12
                          - 3.12
                          - 4.12
                        * - nan
                          - 2.12e+10
                          - 2.12e-10
                    """.replace("                    ", "")
        self.assertEqual(rst.strip("\n "), exp.strip("\n "))
コード例 #29
0
def rst_table_modules():
    """
    produces a table with all modules recommended to do machine learning

    @return         string
    """
    mod = ensae_fullset()
    mod.sort()
    df = pandas.DataFrame(_.as_dict(rst_link=True) for _ in mod)
    df = df[["usage", "rst_link", "kind", "version",
             "license", "purpose", "classifier"]]
    df["classifier"] = df.apply(
        lambda row: classifiers2string(row["classifier"]), axis=1)
    df.columns = ["usage", "name", "kind", "version",
                  "license", "purpose", "classifier"]
    df["lname"] = df["name"].apply(lambda s: s.lower())
    df = df.sort_values("lname").drop("lname", axis=1)
    return df2rst(df)
コード例 #30
0
    def test_documentation(self):
        fLOG(__file__,
             self._testMethodName,
             OutputPrint=__name__ == "__main__")

        mod = all_set()
        mod.sort()
        df = pandas.DataFrame(_.as_dict(rst_link=True) for _ in mod)
        df = df[[
            "usage", "rst_link", "kind", "version", "license", "purpose",
            "classifier"
        ]]
        df["classifier"] = df.apply(
            lambda row: classifiers2string(row["classifier"]), axis=1)
        df.columns = [
            "usage", "name", "kind", "version", "license", "purpose",
            "classifier"
        ]
        fLOG(df2rst(df))
コード例 #31
0
 def test_pandas_rst_size_replace(self):
     df = pandas.DataFrame([{
         "A": "x",
         "AA": "xx",
         "AAA": "xxx"
     }, {
         "AA": "xxxxxxx",
         "AAA": "xxx"
     }])
     rst = df2rst(df, column_size=[1, 1, 2], replacements={'xxx': 'rrrr'})
     exp = """+---+---------+--------+
              | A | AA      | AAA    |
              +===+=========+========+
              | x | xx      | rrrr   |
              +---+---------+--------+
              |   | xxxxxxx | rrrr   |
              +---+---------+--------+
              """.replace("                 ", "")
     self.assertEqual(rst, exp)
コード例 #32
0
    def test_pandas_rst_size_table(self):
        df = pandas.DataFrame([{"A": "x", "AA": "xx", "AAA": "xxx"},
                               {"AA": "xxxxxxx", "AAA": "xxx"}])
        rst = df2rst(df, column_size=[1, 1, 2], list_table=True)
        exp = """
                    .. list-table::
                        :widths: 1 1 2
                        :header-rows: 1

                        * - A
                          - AA
                          - AAA
                        * - x
                          - xx
                          - xxx
                        * -
                          - xxxxxxx
                          - xxx
                    """.replace("                    ", "")
        self.assertEqual(rst.strip("\n "), exp.strip("\n "))
コード例 #33
0
    def test_df2rst_split_col_complex(self):
        data = os.path.join(os.path.abspath(os.path.dirname(__file__)), "data")
        mara = os.path.join(data, "unittst.csv")
        df = pandas.read_csv(mara)
        common = ['name', 'problem', 'scenario']
        subsets = [['opset11', 'opset10', 'opset9'], ['ERROR-msg'],
                   [
                       'RT/SKL-N=1', 'N=10', 'N=100', 'N=1000', 'N=10000',
                       'N=100000', 'RT/SKL-N=1-min', 'RT/SKL-N=1-max',
                       'N=10-min', 'N=10-max', 'N=100-min', 'N=100-max',
                       'N=1000-min', 'N=1000-max', 'N=10000-min',
                       'N=10000-max', 'N=100000-min', 'N=100000-max'
                   ]]

        def build_key_split(key, index):
            new_key = str(key).split('`')[1].split('<')[0].strip()
            return new_key

        def filter_rows(df):
            for c in ['ERROR-msg', 'RT/SKL-N=1']:
                if c in df.columns:
                    return df[df[c].apply(lambda x: pandas.notnull(x) and x
                                          not in (None, '', 'nan'))]
            return df

        conv = df2rst(df,
                      number_format=2,
                      replacements={
                          'nan': '',
                          'ERR: 4convert': ''
                      },
                      split_row=lambda index, dp=df: build_key_split(
                          dp.loc[index, "name"], index),
                      split_col_common=common,
                      split_col_subsets=subsets,
                      filter_rows=filter_rows)
        self.assertIn(
            "| :ref:`ARDRegression <l-ARDRegression-b-reg-default>`     | b-reg     | default  "
            "|               | ?       | ?      |", conv)
        spl = conv.split("+=============================")
        self.assertEqual(len(spl), 7)
コード例 #34
0
    def test_pandas_rst_size_table_number_format(self):
        df = pandas.DataFrame([{
            "A": 2.12345678,
            "AA": 3.12345678,
            "AAA": 4.12345678
        }, {
            "AA": 2.12345678e10,
            "AAA": 2.12345678e-10
        }])
        rst = df2rst(df, list_table=True, header=False, number_format=3)
        exp = """
                    .. list-table::
                        :widths: auto

                        * - 2.12
                          - 3.12
                          - 4.12
                        * -
                          - 2.12e+10
                          - 2.12e-10
                    """.replace("                    ", "")
        self.assertEqual(rst.strip("\n "), exp.strip("\n "))
コード例 #35
0
    def test_pandas_rst_size_table_noheader(self):
        df = pandas.DataFrame([{
            "A": "x",
            "AA": "xx",
            "AAA": "xxx"
        }, {
            "AA": "xxxxxxx",
            "AAA": "xxx"
        }])
        rst = df2rst(df, list_table=True, header=False)
        exp = """
                    .. list-table::
                        :widths: auto

                        * - x
                          - xx
                          - xxx
                        * -
                          - xxxxxxx
                          - xxx
                    """.replace("                    ", "")
        self.assertEqual(rst.strip("\n "), exp.strip("\n "))
コード例 #36
0
 def simple():
     df = pandas.DataFrame([{"A": "x", "AA": "xx", "AAA": "xxx"},
                            {"AA": "xxxxxxx", "AAA": "xxx"}])
     return df2rst(df)
コード例 #37
0
def write_page_onnxrt_benches(app, runtime, skip=None, white_list=None):

    from mlprodict.onnxrt.validate.validate import enumerate_validated_operator_opsets
    logger = getLogger('mlprodict')
    srcdir = app.builder.srcdir if app is not None else ".."

    if runtime in ('python', 'python_compiled'):
        whe = os.path.join(os.path.abspath(srcdir),
                           "skl_converters", "bench_python.rst")
    elif runtime == 'onnxruntime2':
        whe = os.path.join(os.path.abspath(srcdir),
                           "skl_converters", "bench_onnxrt2.rst")
    elif runtime == 'onnxruntime1':
        whe = os.path.join(os.path.abspath(srcdir),
                           "skl_converters", "bench_onnxrt1.rst")
    else:
        raise RuntimeError("Unsupported runtime '{}'.".format(runtime))

    logger.info("[mlprodict] create page '{}'.".format(whe))
    print("[mlprodict-sphinx] create page runtime '{}' - '{}'.".format(runtime, whe))

    filenames = run_benchmark(runtime, srcdir, logger, skip,
                              white_list=white_list)
    dfs_raw = [read_csv(name[0])
               for name in filenames if os.path.exists(name[0])]
    dfs_sum = [read_csv(name[1])
               for name in filenames if os.path.exists(name[1])]
    df_raw = concat(dfs_raw, sort=False)
    piv = concat(dfs_sum, sort=False)

    opset_cols = [(int(oc.replace("opset", "")), oc)
                  for oc in piv.columns if 'opset' in oc]
    opset_cols.sort(reverse=True)
    opset_cols = [oc[1] for oc in opset_cols]
    new_cols = opset_cols[:1]
    bench_cols = ["RT/SKL-N=1", "N=10", "N=100",
                  "N=1000", "N=10000", "N=100000"]
    new_cols.extend(["ERROR-msg", "name", "problem", "scenario", 'optim'])
    new_cols.extend(bench_cols)
    new_cols.extend(opset_cols[1:])
    for c in bench_cols:
        new_cols.append(c + '-min')
        new_cols.append(c + '-max')
    for c in piv.columns:
        if c.startswith("skl_") or c.startswith("onx_"):
            new_cols.append(c)
    new_cols = [_ for _ in new_cols if _ in piv.columns]
    piv = piv[new_cols]

    out_sum = os.path.join(srcdir, "bench_sum_%s.xlsx" % runtime)
    piv.to_excel(out_sum, index=False)
    logger.info("[mlprodict] wrote '{}'.".format(out_sum))
    print("[mlprodict-sphinx] wrote '{}'".format(out_sum))

    out_raw = os.path.join(srcdir, "bench_raw_%s.xlsx" % runtime)
    df_raw.to_excel(out_raw, index=False)
    logger.info("[mlprodict] wrote '{}'.".format(out_raw))
    print("[mlprodict-sphinx] wrote '{}'".format(out_raw))

    logger.info("[mlprodict] shape '{}'.".format(piv.shape))
    print("[mlprodict-sphinx] shape '{}'".format(piv.shape))

    def make_link(row):
        link = ":ref:`{name} <l-{name}-{problem}-{scenario}-{optim}-{opset}>`"
        name = row['name']
        problem = row['problem']
        scenario = row['scenario']
        optim = _clean_values_optim(
            str(row.get('optim', '')).replace("nan", ""))
        opset = _make_opset(row)
        return link.format(name=name, problem=problem,
                           scenario=scenario, optim=optim,
                           opset=opset)

    piv['name'] = piv.apply(lambda row: make_link(row), axis=1)
    piv.reset_index(drop=True, inplace=True)

    if "ERROR-msg" in piv.columns:
        def shorten(text):
            text = str(text)
            if len(text) > 75:
                text = text[:75] + "..."
            return text

        piv["ERROR-msg"] = piv["ERROR-msg"].apply(shorten)

    logger.info("[mlprodict] write '{}'.".format(whe))
    print("[mlprodict-sphinx] write '{}'".format(whe))

    with open(whe, 'w', encoding='utf-8') as f:
        title = "Available of scikit-learn model for runtime {0}".format(
            runtime)
        f.write(dedent('''
        .. _l-onnx-bench-{0}:

        {1}
        {2}

        The following metrics measure the ratio between the prediction time
        for the runtime compare to :epkg:`scikit-learn`.
        It gives an order of magnitude. They are done by setting
        ``assume_finite=True`` (see `config_context
        <https://scikit-learn.org/stable/modules/generated/sklearn.config_context.html>`_).
        The computed ratio is:

        .. math::

            \\frac{{\\textit{{execution when predicting with a custom ONNX runtime}}}}
            {{\\textit{{execution when predicting with scikit-learn (assume\\_finite=True)}}}}

        Due to float32 conversion, it may happen than the highest difference
        is quite high. The proposition :math:`a < b \\Rightarrow [a] < [b]`
        is usually true and but not true all the time. It is the same after number
        where rounded to float32, that's why the result considers the
        fourth highest difference and not the first three.

        Some figures are missing when the number of observations is high.
        That means the prediction is slow for one of the runtime
        (ONNX, scikit-learn) and it would take too long to go further.
        The list of problems can be found in the documentation of
        function :func:`find_suitable_problem
        <mlprodict.onnxrt.validate.validate_problems.find_suitable_problem>`.
        Default values are usually used to create models but other
        scenarios are defined by :func:`build_custom_scenarios
        <mlprodict.onnxrt.validate.validate_scenarios.build_custom_scenarios>`
        and :func:`build_custom_scenarios (2)
        <from mlprodict.onnxrt.validate.validate_scenarios.build_custom_scenarios>`.
        The benchmark can be generated with a command line:

        ::

            python -m mlprodict validate_runtime --verbose=1 --out_raw=data.csv --out_summary=summary.xlsx --benchmark=1 --dump_folder=. --runtime={0}

        The option ``-se 1`` may be used if the process crashes. The command line
        can also be extended to test only one model or to skip another one. The whole
        batch takes between 5 and 15 minutes depending on the machine.

        Full data: :download:`{3} <../{3}>`

        .. contents::
            :local:

        '''.format(runtime, title, "=" * len(title),
                   "bench_sum_%s.xlsx" % runtime)))
        common, subsets = split_columns_subsets(piv)
        f.write(df2rst(piv, number_format=2,
                       replacements={'nan': '', 'ERR: 4convert': ''},
                       split_row=lambda index, dp=piv: build_key_split(
                           dp.loc[index, "name"], index),
                       split_col_common=common,
                       split_col_subsets=subsets,
                       filter_rows=filter_rows,
                       column_size={'problem': 25},
                       label_pattern=".. _lpy-{section}:"))
    logger.info(
        "[mlprodict] done page '{}'.".format(whe))
    print("[mlprodict-sphinx] done page runtime '{}' - '{}'.".format(runtime, whe))
コード例 #38
0
 def test_module_list(self):
     res = df2rst(DataFrame(modules_list()))
     self.assertIn('sklearn', res)
     self.assertIn('numpy', res)
     self.assertIn('skl2onnx', res)