Ejemplo n.º 1
0
    def __init__(self, name, datasets, clog=None, fLOG=noLOG, path_to_images=".",
                 cache_file=None, progressbar=None, graphx=None, graphy=None, **params):
        """
        @param      name            name of the test
        @param      datasets        list of dictionary of dataframes
        @param      clog            see @see cl CustomLog or string
        @param      fLOG            logging function
        @param      params          extra parameters
        @param      path_to_images  path to images and intermediate results
        @param      cache_file      cache file
        @param      progressbar     relies on *tqdm*, example *tnrange*
        @param      graphx          list of variables to use as X axis
        @param      graphy          list of variables to use as Y axis

        If *cache_file* is specified, the class will store the results of the
        method :meth:`bench <pyquickhelper.benchhelper.benchmark.GridBenchMark.bench>`.
        On a second run, the function load the cache
        and run modified or new run (in *param_list*).

        *datasets* should be a dictionary with dataframes a values
        with the following keys:

        * ``'X'``: features
        * ``'Y'``: labels (optional)
        """
        GridBenchMark.__init__(self, name=name, datasets=datasets, clog=clog, fLOG=fLOG,
                               path_to_images=path_to_images, cache_file=cache_file,
                               progressbar=progressbar, **params)
        self._xaxis = graphx
        self._yaxis = graphy
Ejemplo n.º 2
0
    def __init__(self,
                 name,
                 datasets,
                 clog=None,
                 fLOG=noLOG,
                 path_to_images=".",
                 cache_file=None,
                 progressbar=None,
                 graphx=None,
                 graphy=None,
                 **params):
        """
        @param      name            name of the test
        @param      datasets        list of dictionary of dataframes
        @param      clog            see @see cl CustomLog or string
        @param      fLOG            logging function
        @param      params          extra parameters
        @param      path_to_images  path to images and intermediate results
        @param      cache_file      cache file
        @param      progressbar     relies on *tqdm*, example *tnrange*
        @param      graphx          list of variables to use as X axis
        @param      graphy          list of variables to use as Y axis

        If *cache_file* is specified, the class will store the results of the
        method :meth:`bench <pyquickhelper.benchhelper.benchmark.GridBenchMark.bench>`.
        On a second run, the function load the cache
        and run modified or new run (in *param_list*).

        *datasets* should be a dictionary with dataframes a values
        with the following keys:

        * ``'X'``: features
        * ``'Y'``: labels (optional)
        """
        GridBenchMark.__init__(self,
                               name=name,
                               datasets=datasets,
                               clog=clog,
                               fLOG=fLOG,
                               path_to_images=path_to_images,
                               cache_file=cache_file,
                               progressbar=progressbar,
                               **params)
        self._xaxis = graphx
        self._yaxis = graphy
Ejemplo n.º 3
0
    def preprocess_dataset(self, dsi, **params):
        """
        Splits the dataset into train and test.

        @param      params      additional parameters
        @return                 dataset (like info), dictionary for metrics
        """
        ds, appe, params = GridBenchMark.preprocess_dataset(
            self, dsi, **params)

        if "no_split" in ds:
            no_split = ds["no_split"]
        else:
            no_split = False

        if no_split:
            self.fLOG("[MlGridBenchMark.preprocess_dataset] no split")
            return (ds, ds), appe, params
        else:
            self.fLOG("[MlGridBenchMark.preprocess_dataset] split train test")
            spl = ["X", "Y", "weight", "group"]
            names = [_ for _ in spl if _ in ds]
            if len(names) == 0:
                raise Exception("No dataframe or matrix was found.")
            mats = [ds[_] for _ in names]

            pars = {"train_size", "test_size"}
            options = {k: v for k, v in params.items() if k in pars}
            for k in pars:
                if k in params:
                    del params[k]

            res = train_test_split(*mats, **options)

            train = {}
            for i, n in enumerate(names):
                train[n] = res[i * 2]
            test = {}
            for i, n in enumerate(names):
                test[n] = res[i * 2 + 1]

            self.fLOG("[MlGridBenchMark.preprocess_dataset] done")
            return (train, test), appe, params
Ejemplo n.º 4
0
    def preprocess_dataset(self, dsi, **params):
        """
        Splits the dataset into train and test.

        @param      params      additional parameters
        @return                 dataset (like info), dictionary for metrics
        """
        ds, appe, params = GridBenchMark.preprocess_dataset(
            self, dsi, **params)

        if "no_split" in ds:
            no_split = ds["no_split"]
        else:
            no_split = False

        if no_split:
            self.fLOG("[MlGridBenchMark.preprocess_dataset] no split")
            return (ds, ds), appe, params
        else:
            self.fLOG("[MlGridBenchMark.preprocess_dataset] split train test")
            spl = ["X", "Y", "weight", "group"]
            names = [_ for _ in spl if _ in ds]
            if len(names) == 0:
                raise Exception("No dataframe or matrix was found.")
            mats = [ds[_] for _ in names]

            pars = {"train_size", "test_size"}
            options = {k: v for k, v in params.items() if k in pars}
            for k in pars:
                if k in params:
                    del params[k]

            res = train_test_split(*mats, **options)

            train = {}
            for i, n in enumerate(names):
                train[n] = res[i * 2]
            test = {}
            for i, n in enumerate(names):
                test[n] = res[i * 2 + 1]

            self.fLOG("[MlGridBenchMark.preprocess_dataset] done")
            return (train, test), appe, params