Python TransformerMixin Examples, sklearn.base.TransformerMixin Python Examples

Example #1

0

Show file

    def __init__(self,
                 columns=None,
                 remove=None,
                 skip_errors=False,
                 single=False,
                 fLOG=None):
        """
        constructor

        @param      columns         specify a columns selection
        @param      remove          modalities to remove
        @param      skip_errors     skip when a new categories appear (no 1)
        @param      single          use a single column per category, do not multiply them for each value
        @param      fLOG            logging function

        The logging function displays a message when a new dense and big matrix
        is created when it should be sparse. A sparse matrix should be allocated instead.
        """
        BaseEstimator.__init__(self)
        TransformerMixin.__init__(self)
        self._p_columns = columns if isinstance(
            columns, list) or columns is None else [columns]
        self._p_skip_errors = skip_errors
        self._p_remove = remove
        self._p_single = single
        self.fLOG = fLOG

Example #2

0

Show file

File: base.py Project: ogierpaul/suricate

 def __init__(self,
              on=None,
              ixname='ix',
              source_suffix='source',
              target_suffix='target',
              scoresuffix='score',
              **kwargs):
     """
     Args:
         ixname (str): name of the index, default 'ix'
         source_suffix (str): suffix to be added to the left dataframe default 'left', gives --> 'ix_source'
         target_suffix (str): suffix to be added to the left dataframe default 'right', gives --> 'ixright'
         on (str): name of the column on which to do the join
         scoresuffix (str): suffix to be attached to the on column name
     """
     TransformerMixin.__init__(self)
     self.ixname = ixname
     self.source_suffix = source_suffix
     self.target_suffix = target_suffix
     self.ixnamesource, self.ixnametarget, self.ixnamepairs = concatixnames(
         ixname=self.ixname,
         source_suffix=self.source_suffix,
         target_suffix=self.target_suffix)
     self.on = on
     self.scoresuffix = scoresuffix
     if self.on is None:
         self.outcol = self.scoresuffix
     else:
         self.outcol = self.on + '_' + self.scoresuffix
     self.fitted = False
     pass

Example #3

0

Show file

File: explorer.py Project: ogierpaul/suricate

    def __init__(self,
                 clustermixin=None,
                 n_simple=10,
                 n_hard=10,
                 ixname='ix',
                 source_suffix='source',
                 target_suffix='target'):
        """

        Args:
            clustermixin (ClusterMixin): if None, will use KbinsCluster with 25 clusters
            n_simple (int): number of simple questions per cluster
            n_hard (int): number of hard questions per cluster
            ixname (str): default 'ix'
            source_suffix (str): default 'left'
            target_suffix (str): default 'right'
        """
        TransformerMixin.__init__(self)
        self.ixname = ixname
        self.source_suffix = source_suffix
        self.target_suffix = target_suffix
        self.ixnamesource, self.ixnametarget, self.ixnamepairs = concatixnames(
            ixname=self.ixname,
            source_suffix=self.source_suffix,
            target_suffix=self.target_suffix)
        if clustermixin is None:
            clustermixin = KBinsCluster(n_clusters=10)
        self._clustermixin = clustermixin
        self._simplequestions = SimpleQuestions(n_questions=n_simple)
        self._hardquestions = HardQuestions(n_questions=n_hard)
        self._clusterclassifier = ClusterClassifier(
            ixname=self.ixname,
            source_suffix=self.source_suffix,
            target_suffix=self.target_suffix)
        pass

Example #4

0

Show file

 def __init__(self, transformer=None, estimator=None,
              normalize=True, keep_tsne_outputs=False, **kwargs):
     """
     :param transformer: `TSNE` by default
     :param estimator: `MLPRegressor` by default
     :param normalize: normalizes the outputs, centers and normalizes
         the output of the *t-SNE* and applies that same
         normalization to he prediction of the estimator
     :param keep_tsne_output: if True, keep raw outputs of
         :epkg:`TSNE` is stored in member *tsne_outputs_*
     :param kwargs: sent to :meth:`set_params <mlinsights.mlmodel.
         tsne_transformer.PredictableTSNE.set_params>`, see its
         documentation to understand how to specify parameters
     """
     TransformerMixin.__init__(self)
     BaseEstimator.__init__(self)
     if estimator is None:
         estimator = KNeighborsRegressor()
     if transformer is None:
         transformer = TSNE()
     self.estimator = estimator
     self.transformer = transformer
     self.keep_tsne_outputs = keep_tsne_outputs
     if not hasattr(transformer, "fit_transform"):
         raise AttributeError(
             "Transformer {} does not have a 'fit_transform' "
             "method.".format(type(transformer)))
     if not hasattr(estimator, "predict"):
         raise AttributeError(
             "Estimator {} does not have a 'predict' method.".format(
                 type(estimator)))
     self.normalize = normalize
     if kwargs:
         self.set_params(**kwargs)

Example #5

0

Show file

 def __init__(self, estimator, method=None, copy_estimator=True):
     """
     @param      estimator           estimator to wrap in a transformer, it is cloned
                                     with the training data (deep copy) when fitted
     @param      method              if None, guess what method should be called,
                                     *transform* for a transformer,
                                     *predict_proba* for a classifier,
                                     *decision_function* if found,
                                     *predict* otherwiser
     @param      copy_estimator      copy the model instead of taking a reference
     """
     TransformerMixin.__init__(self)
     BaseEstimator.__init__(self)
     self.estimator = estimator
     self.copy_estimator = copy_estimator
     if method is None:
         if hasattr(estimator, "transform"):
             method = "transform"
         elif hasattr(estimator, "predict_proba"):
             method = "predict_proba"
         elif hasattr(estimator, "decision_function"):
             method = "decision_function"
         elif hasattr(estimator, "predict"):
             method = "predict"
         else:
             raise AttributeError(
                 "Cannot find a method transform, predict_proba, decision_function, predict in object {}"
                 .format(type(estimator)))
     if not hasattr(estimator, method):
         raise AttributeError("Cannot find method '{}' in object {}".format(
             method, type(estimator)))
     self.method = method

Example #6

0

Show file

File: test_custom_transformer.py Project: vvandriichuk/sklearn-onnx

 def __init__(self,
              transformer=None,
              estimator=None,
              normalize=True,
              keep_tsne_outputs=False,
              **kwargs):
     TransformerMixin.__init__(self)
     BaseEstimator.__init__(self)
     if estimator is None:
         estimator = KNeighborsRegressor()
     if transformer is None:
         transformer = TSNE()
     self.estimator = estimator
     self.transformer = transformer
     self.keep_tsne_outputs = keep_tsne_outputs
     if not hasattr(transformer, "fit_transform"):
         raise AttributeError(
             "transformer {} does not have a 'fit_transform' "
             "method.".format(type(transformer)))
     if not hasattr(estimator, "predict"):
         raise AttributeError("estimator {} does not have a 'predict' "
                              "method.".format(type(estimator)))
     self.normalize = normalize
     if kwargs:
         self.set_params(**kwargs)

Example #7

0

Show file

File: onnx_transformer.py Project: xadupre/scikit-onnxruntime

 def __init__(self, onnx_bytes, output_name=None, enforce_float32=True):
     BaseEstimator.__init__(self)
     TransformerMixin.__init__(self)
     self.onnx_bytes = onnx_bytes
     self.output_name = output_name
     self.enforce_float32 = enforce_float32
     if not isinstance(onnx_bytes, bytes):
         raise TypeError("onnx_bytes must be bytes to be pickled.")

Example #8

0

Show file

File: extended_features.py Project: sdpython/mlinsights

 def __init__(self, kind='poly', poly_degree=2, poly_interaction_only=False,
              poly_include_bias=True):
     BaseEstimator.__init__(self)
     TransformerMixin.__init__(self)
     self.kind = kind
     self.poly_degree = poly_degree
     self.poly_include_bias = poly_include_bias
     self.poly_interaction_only = poly_interaction_only

Example #9

0

Show file

File: onnx_tokenizer.py Project: sdpython/mlprodict

 def __init__(self, vocab, merges, padding_length=-1, opset=None):
     BaseEstimator.__init__(self)
     TransformerMixin.__init__(self)
     self.vocab = vocab
     self.merges = merges
     self.padding_length = padding_length
     self.opset = opset
     if get_library_path is None:
         raise ImportError("onnxruntime_extensions is not installed.")

Example #10

0

Show file

File: base.py Project: ogierpaul/suricate

    def __init__(self, n_questions=10):
        """

        Args:
            n_questions (int): number of explorer to be asked for each cluster
        """
        TransformerMixin.__init__(self)
        self.n_questions = n_questions
        self.n_clusters = None
        self.clusters = None

Example #11

0

Show file

 def __init__(self, scaler_model, clf_model, hmm_model):
     prob_bins = np.array([-np.inf, 0.1, 0.3, 0.5, 0.7, 0.9, np.inf])
     bins_discretizer = KBinsDiscretizer(encode='ordinal')
     bins_discretizer.n_bins_ = np.array([prob_bins.shape[0]])
     bins_discretizer.bin_edges_ = prob_bins.reshape(1, -1)
     TransformerMixin.__init__(self)
     BaseEstimator.__init__(self)
     self.scaler_model_ = scaler_model
     self.clf_model_ = clf_model
     self.hmm_model_ = hmm_model
     self.bins_discretizer_ = bins_discretizer

Example #12

0

Show file

 def __init__(self, name, fct, kwargs):
     """
     @param      name        function name
     @param      fct         python function
     @param      kwargs      parameters function
     """
     BaseEstimator.__init__(self)
     TransformerMixin.__init__(self)
     self.name_fct = name
     self._fct = fct
     self.kwargs = kwargs

Example #13

0

Show file

 def __init__(self, onnx_bytes, output_name=None, enforce_float32=True,
              runtime='python', change_batch_size=None, reshape=False):
     BaseEstimator.__init__(self)
     TransformerMixin.__init__(self)
     self.onnx_bytes = (onnx_bytes
                        if not hasattr(onnx_bytes, 'SerializeToString')
                        else onnx_bytes.SerializeToString())
     self.output_name = output_name
     self.enforce_float32 = enforce_float32
     self.runtime = runtime
     self.change_batch_size = change_batch_size
     self.reshape = reshape

Example #14

0

Show file

File: onnx_transformer.py Project: sw6y15/onnxruntime

 def __init__(self, onnx_bytes, output_name=None):
     """
     :param onnx_bytes: bytes 
     :param output_name: requested output name or None to request all and
         have method *transform* to store all of them in a dataframe
     """
     BaseEstimator.__init__(self)
     TransformerMixin.__init__(self)
     self.onnx_bytes = onnx_bytes
     self.output_name = output_name
     if not isinstance(onnx_bytes, bytes):
         raise TypeError("onnx_bytes must be bytes to be pickled.")

Example #15

0

Show file

File: soap.py Project: JasonGibsonUfl/quantumMLdev

 def __init__(self,
              species: list = [],
              rcut: int = 6,
              nmax: int = 6,
              lmax: int = 8,
              rbf: str = "gto",
              sigma: float = 0.125,
              average: str = "inner",
              periodic: bool = True,
              convert: bool = True):
     """
     Initiallize class
     
     Parameters
     ----------
     rcut : float
         A cutoff for local region in angstroms. Should be bigger than 1 angstrom
     nmax : int
         The number of radial basis functions.
     lmax : int
         The maximum degree of spherical harmonics.
     species : List
         list of elements
     sigma : float
         The standard deviation of the gaussians used to expand the atomic density.
     rbf : str
         The radial basis functions to use. The available options are:            
             * "gto": Spherical gaussian type orbitals defined as :math:`g_{nl}(r) = \sum_{n'=1}^{n_\mathrm{max}}\,\\beta_{nn'l} r^l e^{-\\alpha_{n'l}r^2}`                
             * "polynomial": Polynomial basis defined as :math:`g_{n}(r) = \sum_{n'=1}^{n_\mathrm{max}}\,\\beta_{nn'} (r-r_\mathrm{cut})^{n'+2}`                
     periodic : bool
         Set to true if you want the descriptor output to respect the periodicity of the atomic systems (see the
         pbc-parameter in the constructor of ase.Atoms).
     average : str
         The averaging mode over the centers of interest.
         Valid options are:
             * "off": No averaging.
             * "inner": Averaging over sites before summing up the magnetic quantum numbers: :math:`p_{nn'l}^{Z_1,Z_2} \sim \sum_m (\\frac{1}{n} \sum_i c_{nlm}^{i, Z_1})^{*} (\\frac{1}{n} \sum_i c_{n'lm}^{i, Z_2})`
             * "outer": Averaging over the power spectrum of different sites: :math:`p_{nn'l}^{Z_1,Z_2} \sim \\frac{1}{n} \sum_i \sum_m (c_{nlm}^{i, Z_1})^{*} (c_{n'lm}^{i, Z_2})`
     convert : bool
         If true convert pymatgen structures to ase.atoms
     """
     TransformerMixin.__init__(self)
     self.species = species
     self.rcut = rcut
     self.nmax = nmax
     self.lmax = lmax
     self.soap: Any = None
     self.rbf = rbf
     self.sigma = sigma
     self.average = average
     self.periodic = periodic
     self.convert = convert

Example #16

0

Show file

File: onnx_transformer.py Project: adrinjalali/mlprodict

 def __init__(self,
              onnx_bytes,
              output_name=None,
              enforce_float32=True,
              runtime='onnxruntime1'):
     BaseEstimator.__init__(self)
     TransformerMixin.__init__(self)
     self.onnx_bytes = (onnx_bytes
                        if not hasattr(onnx_bytes, 'SerializeToString') else
                        onnx_bytes.SerializeToString())
     self.output_name = output_name
     self.enforce_float32 = enforce_float32
     self.runtime = runtime

Example #17

0

Show file

 def __init__(self, on_source='source', on_target='target', compfunc=None,  *args, **kwargs):
     """
     base class for all transformers
     Args:
         on_source (str): name of suffix
         on_target (str):
         compfunc (callable): ['simple', 'token', 'exact']
     """
     TransformerMixin.__init__(self)
     self.left = on_source
     self.right = on_target
     if compfunc is None:
         raise ValueError('comparison function not provided with function', compfunc)
     assert callable(compfunc)
     self.compfunc = compfunc

Example #18

0

Show file

File: cartesian.py Project: ogierpaul/suricate

 def __init__(self,
              ixname='ix',
              source_suffix='source',
              target_suffix='target',
              usecols=None,
              **kwargs):
     TransformerMixin.__init__(self)
     self.ixname = ixname
     self.source_suffix = source_suffix
     self.target_suffix = target_suffix
     self.ixnamesource, self.ixnametarget, self.ixnamepairs = concatixnames(
         ixname=self.ixname,
         source_suffix=self.source_suffix,
         target_suffix=self.target_suffix)
     self.usecols = usecols
     pass

Example #19

0

Show file

File: base.py Project: ogierpaul/suricate

 def __init__(self,
              ixname='ix',
              source_suffix='source',
              target_suffix='target'):
     TransformerMixin.__init__(self)
     self.ixname = ixname
     self.source_suffix = source_suffix
     self.target_suffix = target_suffix
     self.ixnamesource, self.ixnametarget, self.ixnamepairs = concatixnames(
         ixname=self.ixname,
         source_suffix=self.source_suffix,
         target_suffix=self.target_suffix)
     self.index = pd.Index
     self.dfnum = pd.DataFrame()
     self.dfix = pd.DataFrame()
     self.num = None

Example #20

0

Show file

    def __init__(self, model, periods=1, freq='30min'):
        """Lags a dataset.

        Lags all features.
        Missing data is dropped for fitting, and replaced with the mean for predict.

        :periods: Number of timesteps to lag by
        """
        assert isinstance(model, BaseEstimator), "`model` isn't a scikit-learn model"

        BaseEstimator.__init__(self)
        TransformerMixin.__init__(self)

        self.periods = periods
        self.freq = freq

        self.model = model

Example #21

0

Show file

File: ptsne.py Project: muskanmahajan37/CS5173-ManifoldLearning

 def __init__(
     self,
     normalizer=None,
     transformer=None,
     estimator=None,
     normalize=True,
     keep_tsne_outputs=False,
 ):
     """
     @param      normalizer          None by default
     @param      transformer         :epkg:`sklearn:manifold:TSNE`
                                     by default
     @param      estimator           :epkg:`sklearn:neural_network:MLPRegressor`
                                     by default
     @param      normalize           normalizes the outputs, centers and normalizes
                                     the output of the *t-SNE* and applies that same
                                     normalization to he prediction of the estimator
     @param      keep_tsne_output    if True, keep raw outputs of
                                     :epkg:`TSNE` is stored in member
                                     *tsne_outputs_*
     """
     TransformerMixin.__init__(self)
     BaseEstimator.__init__(self)
     if estimator is None:
         estimator = MLPRegressor()
     if transformer is None:
         transformer = TSNE()
     self.estimator = estimator
     self.transformer = transformer
     self.normalizer = normalizer
     self.keep_tsne_outputs = keep_tsne_outputs
     if normalizer is not None and not hasattr(normalizer, "transform"):
         raise AttributeError(
             "normalizer {} does not have a 'transform' method.".format(
                 type(normalizer)))
     if not hasattr(transformer, "fit_transform"):
         raise AttributeError(
             "transformer {} does not have a 'fit_transform' method.".
             format(type(transformer)))
     if not hasattr(estimator, "predict"):
         raise AttributeError(
             "estimator {} does not have a 'predict' method.".format(
                 type(estimator)))
     self.normalize = normalize

Example #22

0

Show file

File: preprocess_test.py Project: elifesciences/sciencebeam-trainer-delft

def _fit_transform_with_state_restore_check(transformer: TransformerMixin, X,
                                            **kwargs):
    transformed = transformer.fit_transform(X, **kwargs)
    LOGGER.debug('transformed: %s', transformed)
    LOGGER.debug('transformed.shape: %s', transformed.shape)
    restored_transformer = _get_state_and_restore(transformer)
    restored_transformed = restored_transformer.transform(X)
    LOGGER.debug('restored_transformed: %s', restored_transformed)
    assert restored_transformed.tolist() == transformed.tolist()
    return transformed

Example #23

0

Show file

File: connector.py Project: ogierpaul/suricate

    def __init__(self,
                 ixname='ix',
                 source_suffix='source',
                 target_suffix='target'):
        """

        Args:
            ixname: 'ix'
            source_suffix: 'source'
            target_suffix: 'target'
        """
        TransformerMixin.__init__(self)
        self.ixname = ixname
        self.source_suffix = source_suffix
        self.target_suffix = target_suffix
        self.ixnamesource, self.ixnametarget, self.ixnamepairs = concatixnames(
            ixname=self.ixname,
            source_suffix=self.source_suffix,
            target_suffix=self.target_suffix)

Example #24

0

Show file

def transform(dataframe: pd.DataFrame,
              scaler: TransformerMixin) -> pd.DataFrame:
    fields_to_normalize = dataframe.filter(
        ['preco', 'prazo', 'frete', 'latitude', 'longitude']).to_numpy()

    feature_scaled = scaler.fit_transform(fields_to_normalize)

    dataframe['features'] = list(feature_scaled)

    return dataframe

Example #25

0

Show file

 def __init__(
     self,
     elements: List,
     rcut: float = 10.1,
     stepSize: float = 0.1,
     sigma: float = 0.2,
 ):
     """
     Parameters : list
         list of elements symbols
     """
     TransformerMixin.__init__(self)
     self.elements = elements
     self.rdf_tup = calc_rdf_tup(elements)
     self.rcut = rcut
     self.stepSize = stepSize
     self.sigma = sigma
     self.binRad = np.arange(0.1, self.rcut, self.stepSize)
     self.numBins = len(self.binRad)
     self.numPairs = len(self.rdf_tup)

Example #26

0

Show file

    def __init__(self, columns=None, remove=None, skip_errors=False, single=False, fLOG=None):
        """
        constructor

        @param      columns         specify a columns selection
        @param      remove          modalities to remove
        @param      skip_errors     skip when a new categories appear (no 1)
        @param      single          use a single column per category, do not multiply them for each value
        @param      fLOG            logging function

        The logging function displays a message when a new dense and big matrix
        is created when it should be sparse. A sparse matrix should be allocated instead.
        """
        BaseEstimator.__init__(self)
        TransformerMixin.__init__(self)
        self._p_columns = columns if isinstance(
            columns, list) or columns is None else [columns]
        self._p_skip_errors = skip_errors
        self._p_remove = remove
        self._p_single = single
        self.fLOG = fLOG

Example #27

0

Show file

File: import_sklearn.py Project: dycw/dotfiles

 def apply_transform(scaler: TransformerMixin, x: ArrayLike) -> ArrayLike:
     if isinstance(x, ndarray):
         return scaler.transform(x)
     elif isinstance(x, Series):
         return Series(apply_transform(scaler, x.to_numpy()),
                       x.index,
                       name=x.name)
     elif isinstance(x, DataFrame):
         return DataFrame(apply_transform(scaler, x.to_numpy()), x.index,
                          x.columns)
     else:
         raise TypeError(f"Invalid type: {type(x).__name__}")

Example #28

0

Show file

File: preprocess.py Project: elifesciences/sciencebeam-trainer-delft

def _fit_transformer_with_progress_logging(transformer: TransformerMixin,
                                           X,
                                           logger: logging.Logger,
                                           message_prefix: str,
                                           unit: str,
                                           message_suffx: str = ': '):
    if isinstance(transformer, Pipeline):
        steps = transformer.steps
        if len(steps) == 1 and isinstance(steps[0][1], FeatureUnion):
            feature_union = steps[0][1]
            for name, union_transformer in feature_union.transformer_list:
                X = logging_tqdm(
                    iterable=X,
                    logger=logger,
                    desc=f'{message_prefix}.{name}{message_suffx}',
                    unit=unit)
                union_transformer.fit(X)
            return
    X = logging_tqdm(iterable=X,
                     logger=logger,
                     desc=message_prefix + message_suffx,
                     unit=unit)
    transformer.fit(X)

Example #29

0

Show file

File: onnx_tokenizer.py Project: sdpython/mlprodict

 def __init__(self,
              model,
              nbest_size=1,
              alpha=0.5,
              reverse=False,
              add_bos=False,
              add_eos=False,
              opset=None):
     BaseEstimator.__init__(self)
     TransformerMixin.__init__(self)
     if isinstance(model, bytes):
         self.model_b64 = model
     else:
         ints = model.tolist()
         b64 = base64.b64encode(ints)
         self.model_b64 = b64
     self.nbest_size = nbest_size
     self.alpha = alpha
     self.reverse = reverse
     self.add_bos = add_bos
     self.add_eos = add_eos
     self.opset = opset
     if get_library_path is None:
         raise ImportError("onnxruntime_extensions is not installed.")

Example #30

0

Show file

def _infer_feature_names(
        input_feature_names: Sequence[str],
        output_num_cols: int,
        trans_name: str,
        transformer: TransformerMixin
) -> Sequence[str]:
    feature_names = False
    try:
        # polynomial features will add ^1, ^2, etc.
        # one_hot_encoder will add labels
        # TODO: this breaks the mapping between names in get/set_params and elsewhere feature-names. I think that's ok
        feature_names = transformer.get_feature_names(input_feature_names)
    except (TypeError, AttributeError, NotImplementedError):
        if output_num_cols == len(input_feature_names):
            if isinstance(transformer, (StandardScaler, MinMaxScaler, RobustScaler)):
                feature_names = [f"{trans_name}({fname})" for i, fname in enumerate(input_feature_names)]
            else:
                # can't assume 1-1 mapping. gotta wait for sklearn to support get_feature_names on everything
                # https://github.com/scikit-learn/scikit-learn/pull/12627
                pass

        elif len(input_feature_names) == 1:
            if output_num_cols == 1:
                feature_names = [f"{trans_name}({input_feature_names[0]})"]
            else:
                feature_names = [f"{trans_name}({input_feature_names[0]})[{i}]" for i in range(output_num_cols)]

    if feature_names is False:
        # TODO: maybe somehow support passing aliases?
        warn(f"Unable to infer feature-names for {trans_name}, forced to concatenate.")
        return _infer_feature_names(
            output_num_cols=output_num_cols,
            trans_name=trans_name,
            transformer=transformer,
            input_feature_names=input_feature_names.__repr__()
        )

    return feature_names

Example #31

0

Show file

File: taggerTrainKeras.py Project: chixma/TranskribusDU

 def __init__(self):
     BaseEstimator.__init__(self)
     TransformerMixin.__init__(self)