Esempio n. 1
0
    def __init__(self,
                 columns=None,
                 remove=None,
                 skip_errors=False,
                 single=False,
                 fLOG=None):
        """
        constructor

        @param      columns         specify a columns selection
        @param      remove          modalities to remove
        @param      skip_errors     skip when a new categories appear (no 1)
        @param      single          use a single column per category, do not multiply them for each value
        @param      fLOG            logging function

        The logging function displays a message when a new dense and big matrix
        is created when it should be sparse. A sparse matrix should be allocated instead.
        """
        BaseEstimator.__init__(self)
        TransformerMixin.__init__(self)
        self._p_columns = columns if isinstance(
            columns, list) or columns is None else [columns]
        self._p_skip_errors = skip_errors
        self._p_remove = remove
        self._p_single = single
        self.fLOG = fLOG
 def __init__(self, estimator=None, clus=None, **kwargs):
     """
     @param  estimator   :epkg:`sklearn:linear_model:LogisiticRegression`
                         by default
     @param  clus        clustering applied on each class,
                         by default k-means with two classes
     @param  kwargs      sent to :meth:`set_params
                         <mlinsights.mlmodel.classification_kmeans.
                         ClassifierAfterKMeans.set_params>`,
                         see its documentation to understand how to
                         specify parameters
     """
     ClassifierMixin.__init__(self)
     BaseEstimator.__init__(self)
     if estimator is None:
         estimator = LogisticRegression()
     if clus is None:
         clus = KMeans(n_clusters=2)
     self.estimator = estimator
     self.clus = clus
     if not hasattr(clus, "transform"):
         raise AttributeError(  # pragma: no cover
             "clus does not have a transform method.")
     if kwargs:
         self.set_params(**kwargs)
    def __init__(self,
                 estimator=None,
                 max_depth=20,
                 min_samples_split=2,
                 min_samples_leaf=2,
                 min_weight_fraction_leaf=0.0,
                 fit_improve_algo='auto',
                 p1p2=0.09,
                 gamma=1.,
                 verbose=0):
        "constructor"
        ClassifierMixin.__init__(self)
        BaseEstimator.__init__(self)
        # logistic regression
        if estimator is None:
            self.estimator = LogisticRegression()
        else:
            self.estimator = estimator
        if max_depth is None:
            raise ValueError("'max_depth' cannot be None.")
        if max_depth > 1024:
            raise ValueError("'max_depth' must be <= 1024.")
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.min_samples_leaf = min_samples_leaf
        self.min_weight_fraction_leaf = min_weight_fraction_leaf
        self.fit_improve_algo = fit_improve_algo
        self.p1p2 = p1p2
        self.gamma = gamma
        self.verbose = verbose

        if self.fit_improve_algo not in DecisionTreeLogisticRegression._fit_improve_algo_values:
            raise ValueError("fit_improve_algo='{}' not in {}".format(
                self.fit_improve_algo,
                DecisionTreeLogisticRegression._fit_improve_algo_values))
Esempio n. 4
0
    def __init__(self,  n_estimators=20, 
                        max_depth=5, min_samples_split=10, min_samples_leaf=10,
                        random_state=0,
                        em_itrs=5,
                        regularization=0.05,
                        passive_dyn_func=None,
                        passive_dyn_ctrl=None,
                        passive_dyn_noise=None,
                        verbose=False):
        '''
        n_estimators        - number of ensembled models
        ...                 - a batch of parameters used for RandomTreesEmbedding, see relevant documents
        em_itrs             - maximum number of EM iterations to take
        regularization      - small positive scalar to prevent singularity of matrix inversion
        passive_dyn_func    - function to evaluate passive dynamics; None for MaxEnt model
        passive_dyn_ctrl    - function to return the control matrix which might depend on the state...
        passive_dyn_noise   - covariance of a Gaussian noise; only applicable when passive_dyn is Gaussian; None for MaxEnt model
                                note this implies a dynamical system with constant input gain. It is extendable to have state dependent
                                input gain then we need covariance for each data point
        verbose             - output training information
        '''
        BaseEstimator.__init__(self)

        self.n_estimators=n_estimators
        self.max_depth=max_depth
        self.min_samples_split=min_samples_split
        self.min_samples_leaf=min_samples_leaf
        self.random_state=random_state
        self.em_itrs=em_itrs
        self.reg=regularization
        self.passive_dyn_func=passive_dyn_func
        self.passive_dyn_ctrl=passive_dyn_ctrl
        self.passive_dyn_noise=passive_dyn_noise
        self.verbose=verbose
        return
 def __init__(self,
              transformer=None,
              estimator=None,
              normalize=True,
              keep_tsne_outputs=False,
              **kwargs):
     TransformerMixin.__init__(self)
     BaseEstimator.__init__(self)
     if estimator is None:
         estimator = KNeighborsRegressor()
     if transformer is None:
         transformer = TSNE()
     self.estimator = estimator
     self.transformer = transformer
     self.keep_tsne_outputs = keep_tsne_outputs
     if not hasattr(transformer, "fit_transform"):
         raise AttributeError(
             "transformer {} does not have a 'fit_transform' "
             "method.".format(type(transformer)))
     if not hasattr(estimator, "predict"):
         raise AttributeError("estimator {} does not have a 'predict' "
                              "method.".format(type(estimator)))
     self.normalize = normalize
     if kwargs:
         self.set_params(**kwargs)
Esempio n. 6
0
 def __init__(self, transformer=None, estimator=None,
              normalize=True, keep_tsne_outputs=False, **kwargs):
     """
     :param transformer: `TSNE` by default
     :param estimator: `MLPRegressor` by default
     :param normalize: normalizes the outputs, centers and normalizes
         the output of the *t-SNE* and applies that same
         normalization to he prediction of the estimator
     :param keep_tsne_output: if True, keep raw outputs of
         :epkg:`TSNE` is stored in member *tsne_outputs_*
     :param kwargs: sent to :meth:`set_params <mlinsights.mlmodel.
         tsne_transformer.PredictableTSNE.set_params>`, see its
         documentation to understand how to specify parameters
     """
     TransformerMixin.__init__(self)
     BaseEstimator.__init__(self)
     if estimator is None:
         estimator = KNeighborsRegressor()
     if transformer is None:
         transformer = TSNE()
     self.estimator = estimator
     self.transformer = transformer
     self.keep_tsne_outputs = keep_tsne_outputs
     if not hasattr(transformer, "fit_transform"):
         raise AttributeError(
             "Transformer {} does not have a 'fit_transform' "
             "method.".format(type(transformer)))
     if not hasattr(estimator, "predict"):
         raise AttributeError(
             "Estimator {} does not have a 'predict' method.".format(
                 type(estimator)))
     self.normalize = normalize
     if kwargs:
         self.set_params(**kwargs)
Esempio n. 7
0
    def __init__(self, binner=None, estimator=None, n_jobs=None, verbose=False):
        """
        @param      binner              transformer or predictor which creates the buckets
        @param      estimator           predictor trained on every bucket
        @param      n_jobs              number of parallel jobs (for training and predicting)
        @param      verbose             boolean or use ``'tqdm'`` to use :epkg:`tqdm`
                                        to fit the estimators

        *binner* must be filled or must be:

        - ``'bins'``: the model :epkg:`sklearn:preprocessing:KBinsDiscretizer`
        - any instanciated model

        *estimator* allows the following values:

        - ``None``: the model is :epkg:`sklearn:linear_model:LinearRegression`
        - any instanciated model
        """
        BaseEstimator.__init__(self)
        if estimator is None:
            raise ValueError(  # pragma: no cover
                "estimator cannot be null.")
        if binner is None:
            raise TypeError(  # pragma: no cover
                "Unsupported options for binner=='tree' and model {}.".format(
                    type(estimator)))
        elif binner == "bins":
            binner = KBinsDiscretizer()
        self.binner = binner
        self.estimator = estimator
        self.n_jobs = n_jobs
        self.verbose = verbose
Esempio n. 8
0
 def __init__(self, estimator, method=None, copy_estimator=True):
     """
     @param      estimator           estimator to wrap in a transformer, it is cloned
                                     with the training data (deep copy) when fitted
     @param      method              if None, guess what method should be called,
                                     *transform* for a transformer,
                                     *predict_proba* for a classifier,
                                     *decision_function* if found,
                                     *predict* otherwiser
     @param      copy_estimator      copy the model instead of taking a reference
     """
     TransformerMixin.__init__(self)
     BaseEstimator.__init__(self)
     self.estimator = estimator
     self.copy_estimator = copy_estimator
     if method is None:
         if hasattr(estimator, "transform"):
             method = "transform"
         elif hasattr(estimator, "predict_proba"):
             method = "predict_proba"
         elif hasattr(estimator, "decision_function"):
             method = "decision_function"
         elif hasattr(estimator, "predict"):
             method = "predict"
         else:
             raise AttributeError(
                 "Cannot find a method transform, predict_proba, decision_function, predict in object {}"
                 .format(type(estimator)))
     if not hasattr(estimator, method):
         raise AttributeError("Cannot find method '{}' in object {}".format(
             method, type(estimator)))
     self.method = method
Esempio n. 9
0
 def __init__(self,
              embedding,
              m=10,
              analyzer=None,
              eqe=1,
              verbose=0,
              a=1,
              c=0,
              n_jobs=1):
     """
     Initializes the embedding based query language model query expansion
     technique
     """
     BaseEstimator.__init__(self)
     self._embedding = embedding
     self._analyzer = analyzer
     if eqe not in [1, 2]:
         raise ValueError
     self._eqe = eqe
     self.verbose = verbose
     self._a = a
     self._c = c
     self.m = m
     self.n_jobs = n_jobs
     self.vocabulary = None
 def __init__(self, estimator=None, threshold=0.75):
     ClassifierMixin.__init__(self)
     BaseEstimator.__init__(self)
     if estimator is None:
         estimator = LogisticRegression(solver='liblinear')
     self.estimator = estimator
     self.threshold = threshold
Esempio n. 11
0
 def __init__(self, kind='poly', poly_degree=2, poly_interaction_only=False,
              poly_include_bias=True):
     BaseEstimator.__init__(self)
     TransformerMixin.__init__(self)
     self.kind = kind
     self.poly_degree = poly_degree
     self.poly_include_bias = poly_include_bias
     self.poly_interaction_only = poly_interaction_only
Esempio n. 12
0
 def __init__(self, p=1, n_jobs=-1, warmup=1000, samples_per_chain=1000,
              n_chains=4, normalize=True):
     BaseEstimator.__init__(self)
     BaseAR.__init__(self, n_jobs=n_jobs, warmup=warmup,
                     samples_per_chain=samples_per_chain,
                     n_chains=4, normalize=True)
     self.p = p
     return
 def __init__(self, onnx_bytes, output_name=None, enforce_float32=True):
     BaseEstimator.__init__(self)
     TransformerMixin.__init__(self)
     self.onnx_bytes = onnx_bytes
     self.output_name = output_name
     self.enforce_float32 = enforce_float32
     if not isinstance(onnx_bytes, bytes):
         raise TypeError("onnx_bytes must be bytes to be pickled.")
Esempio n. 14
0
 def __init__(self, vocab, merges, padding_length=-1, opset=None):
     BaseEstimator.__init__(self)
     TransformerMixin.__init__(self)
     self.vocab = vocab
     self.merges = merges
     self.padding_length = padding_length
     self.opset = opset
     if get_library_path is None:
         raise ImportError("onnxruntime_extensions is not installed.")
Esempio n. 15
0
 def __init__(self, num_inputs, mxseed=0, epochs=5000, net_type=1):
     BaseEstimator.__init__(self)
     RegressorMixin.__init__(self)
     self.net = None
     self.num_inputs = num_inputs
     self.mxseed = mxseed
     self.epochs = epochs
     self.net_type = net_type
     return
Esempio n. 16
0
 def __init__(self, embedding, analyzer='word', m=10, verbose=0,
              use_idf=True, **ev_params):
     """Expand a query by the nearest known tokens to its centroid
     """
     self.embedding = embedding
     self.m = m
     self.vect = EmbeddedVectorizer(embedding,
                                    analyzer=analyzer,
                                    use_idf=use_idf,
                                    **ev_params)
     BaseEstimator.__init__(self)
Esempio n. 17
0
 def __init__(self, name, fct, kwargs):
     """
     @param      name        function name
     @param      fct         python function
     @param      kwargs      parameters function
     """
     BaseEstimator.__init__(self)
     TransformerMixin.__init__(self)
     self.name_fct = name
     self._fct = fct
     self.kwargs = kwargs
Esempio n. 18
0
 def __init__(self, scaler_model, clf_model, hmm_model):
     prob_bins = np.array([-np.inf, 0.1, 0.3, 0.5, 0.7, 0.9, np.inf])
     bins_discretizer = KBinsDiscretizer(encode='ordinal')
     bins_discretizer.n_bins_ = np.array([prob_bins.shape[0]])
     bins_discretizer.bin_edges_ = prob_bins.reshape(1, -1)
     TransformerMixin.__init__(self)
     BaseEstimator.__init__(self)
     self.scaler_model_ = scaler_model
     self.clf_model_ = clf_model
     self.hmm_model_ = hmm_model
     self.bins_discretizer_ = bins_discretizer
Esempio n. 19
0
 def __init__(self, onnx_bytes, output_name=None, enforce_float32=True,
              runtime='python', change_batch_size=None, reshape=False):
     BaseEstimator.__init__(self)
     TransformerMixin.__init__(self)
     self.onnx_bytes = (onnx_bytes
                        if not hasattr(onnx_bytes, 'SerializeToString')
                        else onnx_bytes.SerializeToString())
     self.output_name = output_name
     self.enforce_float32 = enforce_float32
     self.runtime = runtime
     self.change_batch_size = change_batch_size
     self.reshape = reshape
Esempio n. 20
0
 def __init__(self, onnx_bytes, output_name=None):
     """
     :param onnx_bytes: bytes 
     :param output_name: requested output name or None to request all and
         have method *transform* to store all of them in a dataframe
     """
     BaseEstimator.__init__(self)
     TransformerMixin.__init__(self)
     self.onnx_bytes = onnx_bytes
     self.output_name = output_name
     if not isinstance(onnx_bytes, bytes):
         raise TypeError("onnx_bytes must be bytes to be pickled.")
Esempio n. 21
0
 def __init__(self, model='SIR', t=0, max_iter=100,
              learning_rate_init=0.1, lr_schedule='constant',
              momentum=0.9, power_t=0.5, early_th=None,
              min_threshold='auto', max_threshold='auto',
              verbose=False, init=None):
     if init is not None:
         if isinstance(init, EpidemicRegressor):
             if hasattr(init, 'coef_'):
                 init = init.coef_.copy()
             else:
                 init = None  # pragma: no cover
         elif not isinstance(init, dict):
             raise TypeError(
                 f"init must be a dictionary not {type(init)}.")
     BaseEstimator.__init__(self)
     RegressorMixin.__init__(self)
     self.t = t
     self.model = model
     self.max_iter = max_iter
     self.learning_rate_init = learning_rate_init
     self.lr_schedule = lr_schedule
     self.momentum = momentum
     self.power_t = power_t
     self.early_th = early_th
     self.verbose = verbose
     if min_threshold == 'auto':
         if model.upper() in ('SIR', 'SIRD'):
             min_threshold = 0.0001
         elif model.upper() in ('SIRC', ):
             pmin = dict(beta=0.001, nu=0.0001, mu=0.0001,
                         a=-1., b=0., c=0.)
             min_threshold = numpy.array(
                 [pmin[k[0]] for k in CovidSIRDc.P0])
         elif model.upper() in ('SIRDC'):
             pmin = dict(beta=0.001, nu=0.001, mu=0.001,
                         a=-1., b=0., c=0.)
             min_threshold = numpy.array(
                 [pmin[k[0]] for k in CovidSIRDc.P0])
     if max_threshold == 'auto':
         if model.upper() in ('SIR', 'SIRD'):
             max_threshold = 1.
         elif model.upper() in ('SIRC', 'SIRDC'):
             pmax = dict(beta=1., nu=0.5, mu=0.5,
                         a=0., b=4., c=2.)
             max_threshold = numpy.array(
                 [pmax[k[0]] for k in CovidSIRDc.P0])
     self.min_threshold = min_threshold
     self.max_threshold = max_threshold
     self._get_model()
     self.init = init
     if init is not None:
         self.coef_ = init
Esempio n. 22
0
 def __init__(self, force_positive=False, **kwargs):
     """
     *kwargs* should contains parameters
     for :epkg:`sklearn:decomposition:NMF`.
     The parameter *force_positive* removes all
     negative predictions and replaces by zero.
     """
     BaseEstimator.__init__(self)
     RegressorMixin.__init__(self)
     MultiOutputMixin.__init__(self)
     for k, v in kwargs.items():
         setattr(self, k, v)
     self.force_positive = force_positive
Esempio n. 23
0
    def __init__(self, embedding, analyzer, m=10):
        """Initializes Embedding Based Query Expansion

        :embedding: TODO
        :analyzer: TODO
        :m: TODO

        """
        BaseEstimator.__init__(self)

        self._embedding = embedding
        self._m = m
        self._cv = CountVectorizer(analyzer=analyzer)
Esempio n. 24
0
    def __init__(self, embedding, analyzer, m=10):
        """Initializes Embedding Based Query Expansion

        :embedding: TODO
        :analyzer: TODO
        :m: TODO

        """
        BaseEstimator.__init__(self)

        self._embedding = embedding
        self._m = m
        self._cv = CountVectorizer(analyzer=analyzer)
Esempio n. 25
0
 def __init__(self,
              onnx_bytes,
              output_name=None,
              enforce_float32=True,
              runtime='onnxruntime1'):
     BaseEstimator.__init__(self)
     TransformerMixin.__init__(self)
     self.onnx_bytes = (onnx_bytes
                        if not hasattr(onnx_bytes, 'SerializeToString') else
                        onnx_bytes.SerializeToString())
     self.output_name = output_name
     self.enforce_float32 = enforce_float32
     self.runtime = runtime
Esempio n. 26
0
    def __init__(self,
                 n_jobs=-1,
                 warmup=1000,
                 samples_per_chain=1000,
                 n_chains=4,
                 normalize=True,
                 max_samples_mem=500):
        """
        An interface to the following stan model

        y0 ~ cauchy(0, 1);
        nu ~ cauchy(0, 1);
        sigma ~ normal(0, 1);  // half-normal
        lam ~ exponential(1);
        theta ~ normal(0, lam);
        y ~ student_t(nu, y0 + Q * theta, sigma);

        params:
          n_jobs: Number of cores to use
          warmup: Number of warmup iterations for HMC, roughly analagous
              to a burnin period.
          samples_per_chain: Number of samples to draw per chain
          n_chains: Number of chains (should run at least 2)
          normalize: Whether to normalize the data before feeding it
              to stan.  This is necessary as the priors in the model
              are fixed.
          max_samples_mem: A parameter to prevent blowing up all the
              memory when sampling the posterior predictive.
        """
        BaseEstimator.__init__(self)
        StanCacheMixin.__init__(self, MODEL_DIR)

        self.stan_model, self.predict_model = self._load_compiled_models()

        self.stan_fitting_kwargs = {
            "chains": n_chains,
            "iter_sampling": samples_per_chain,
            "iter_warmup": warmup,
            "inits": 1,
            "metric": "diag_e",
            "adapt_delta": 0.8
        }

        self._fit_results = None
        self.normalize = normalize
        self.max_samples_mem = max_samples_mem

        if normalize:
            self._y_ss = StandardScaler()
            self._X_ss = StandardScaler()
        return
Esempio n. 27
0
 def __init__(self,
              estimator,
              runtime='python',
              enforce_float32=True,
              target_opset=None,
              conv_options=None,
              nopython=True):
     BaseEstimator.__init__(self)
     self.estimator = estimator
     self.runtime = runtime
     self.enforce_float32 = enforce_float32
     self.target_opset = target_opset
     self.conv_options = conv_options
     self.nopython = nopython
Esempio n. 28
0
 def __init__(self,
              embedding,
              analyzer='word',
              m=10,
              verbose=0,
              use_idf=True,
              **ev_params):
     """Expand a query by the nearest known tokens to its centroid
     """
     self.embedding = embedding
     self.m = m
     self.vect = EmbeddedVectorizer(embedding,
                                    analyzer=analyzer,
                                    use_idf=use_idf,
                                    **ev_params)
     BaseEstimator.__init__(self)
Esempio n. 29
0
    def __init__(self, model, periods=1, freq='30min'):
        """Lags a dataset.

        Lags all features.
        Missing data is dropped for fitting, and replaced with the mean for predict.

        :periods: Number of timesteps to lag by
        """
        assert isinstance(model, BaseEstimator), "`model` isn't a scikit-learn model"

        BaseEstimator.__init__(self)
        TransformerMixin.__init__(self)

        self.periods = periods
        self.freq = freq

        self.model = model
 def __init__(self, rf_estimator=None, lasso_estimator=None):
     """
     @param  rf_estimator    random forest estimator,
                             :epkg:`sklearn:ensemble:RandomForestRegressor`
                             by default
     @param  lass_estimator  Lasso estimator,
                             :epkg:`sklearn:linear_model:LassoRegression`
                             by default
     """
     BaseEstimator.__init__(self)
     RegressorMixin.__init__(self)
     if rf_estimator is None:
         rf_estimator = RandomForestRegressor()
     if lasso_estimator is None:
         lasso_estimator = Lasso()
     self.rf_estimator = rf_estimator
     self.lasso_estimator = lasso_estimator
Esempio n. 31
0
    def __init__(self,
                 n_jobs=-1,
                 warmup=1000,
                 samples_per_chain=1000,
                 n_chains=4,
                 normalize=True,
                 max_samples_mem=500):
        BaseEstimator.__init__(self)
        StanCacheMixin.__init__(self, MODEL_DIR)

        self.stan_model, self.predict_model = self._load_compiled_models()

        # The control parameters for NUTS, most are left as default
        control = {
            "metric": "diag_e",  # Type of mass matrix (diag_e default)
            "stepsize_jitter": 0.05,  # Slight randomization of stepsizes
            "adapt_engaged": True,
            "adapt_gamma": 0.05,  # Regularization scale
            "adapt_delta": 0.8,  # Target acceptance probability (.8 default)
            "adapt_kappa": 0.75,  # Relaxation exponent
            "adapt_t0": 10,  # Adaptation iteration offset
            "adapt_init_buffer": 75,  # First fast adapt period
            "adapt_term_buffer": 50,  # Last fast adapt period
            "adapt_window": 25,  # First slow adapt period
            "max_treedepth": 10,  # N_leapfrog ~ 2**max_treedepth
        }

        self.stan_fitting_kwargs = {
            "chains": n_chains,
            "iter": samples_per_chain + warmup,
            "warmup": warmup,
            "init": "random",
            "init_r": 1.0,
            "n_jobs": n_jobs,
            "control": control
        }

        self._fit_results = None
        self._fit_X = None
        self.normalize = normalize
        self.max_samples_mem = max_samples_mem

        if normalize:
            self._y_ss = StandardScaler(with_mean=True)
            self._X_ss = StandardScaler()
        return
Esempio n. 32
0
 def __init__(self, wv, m=10, analyzer=str.split, eqe=1, verbose=0, a=1,
              c=0, n_jobs=1):
     """
     Initializes the embedding based query language model query expansion
     technique
     """
     BaseEstimator.__init__(self)
     self._wv = wv
     self._analyzer = analyzer
     if eqe not in [1, 2]:
         raise ValueError
     self._eqe = eqe
     self.verbose = verbose
     self._a = a
     self._c = c
     self.m = m
     self.n_jobs = n_jobs
     self.vocabulary = None
 def __init__(
     self,
     normalizer=None,
     transformer=None,
     estimator=None,
     normalize=True,
     keep_tsne_outputs=False,
 ):
     """
     @param      normalizer          None by default
     @param      transformer         :epkg:`sklearn:manifold:TSNE`
                                     by default
     @param      estimator           :epkg:`sklearn:neural_network:MLPRegressor`
                                     by default
     @param      normalize           normalizes the outputs, centers and normalizes
                                     the output of the *t-SNE* and applies that same
                                     normalization to he prediction of the estimator
     @param      keep_tsne_output    if True, keep raw outputs of
                                     :epkg:`TSNE` is stored in member
                                     *tsne_outputs_*
     """
     TransformerMixin.__init__(self)
     BaseEstimator.__init__(self)
     if estimator is None:
         estimator = MLPRegressor()
     if transformer is None:
         transformer = TSNE()
     self.estimator = estimator
     self.transformer = transformer
     self.normalizer = normalizer
     self.keep_tsne_outputs = keep_tsne_outputs
     if normalizer is not None and not hasattr(normalizer, "transform"):
         raise AttributeError(
             "normalizer {} does not have a 'transform' method.".format(
                 type(normalizer)))
     if not hasattr(transformer, "fit_transform"):
         raise AttributeError(
             "transformer {} does not have a 'fit_transform' method.".
             format(type(transformer)))
     if not hasattr(estimator, "predict"):
         raise AttributeError(
             "estimator {} does not have a 'predict' method.".format(
                 type(estimator)))
     self.normalize = normalize
Esempio n. 34
0
 def __init__(self, estimator=None, n_estimators=10, n_jobs=None,
              alpha=1., verbose=False):
     """
     @param      estimator           predictor trained on every bucket
     @param      n_estimators        number of estimators to train
     @param      n_jobs              number of parallel jobs (for training and predicting)
     @param      alpha               proportion of samples resampled for each training
     @param      verbose             boolean or use ``'tqdm'`` to use :epkg:`tqdm`
                                     to fit the estimators
     """
     BaseEstimator.__init__(self)
     RegressorMixin.__init__(self)
     if estimator is None:
         raise ValueError("estimator cannot be null.")
     self.estimator = estimator
     self.n_jobs = n_jobs
     self.alpha = alpha
     self.verbose = verbose
     self.n_estimators = n_estimators
Esempio n. 35
0
    def __init__(self, retrieval_model, matching=None,
                 query_expansion=None, name='RM',
                 labels=None):
        """TODO: to be defined1.

        :retrieval_model: A retrieval model satisfying fit and query.
        :vectorizer: A vectorizer satisfying fit and transform (and fit_transform).
        :matching: A matching operation satisfying fit and predict.
        :query_expansion: A query operation satisfying fit and transform
        :labels: Pre-defined mapping of indices to identifiers, will be inferred during fit, if not given.

        """
        BaseEstimator.__init__(self)

        self._retrieval_model = retrieval_model
        self._matching = matching
        self._query_expansion = query_expansion
        self.name = name
        self.labels_ = np.asarray(labels) if labels is not None else None
Esempio n. 36
0
    def __init__(self, columns=None, remove=None, skip_errors=False, single=False, fLOG=None):
        """
        constructor

        @param      columns         specify a columns selection
        @param      remove          modalities to remove
        @param      skip_errors     skip when a new categories appear (no 1)
        @param      single          use a single column per category, do not multiply them for each value
        @param      fLOG            logging function

        The logging function displays a message when a new dense and big matrix
        is created when it should be sparse. A sparse matrix should be allocated instead.
        """
        BaseEstimator.__init__(self)
        TransformerMixin.__init__(self)
        self._p_columns = columns if isinstance(
            columns, list) or columns is None else [columns]
        self._p_skip_errors = skip_errors
        self._p_remove = remove
        self._p_single = single
        self.fLOG = fLOG
Esempio n. 37
0
 def __init__(self, cost_func, n_class=2):
     BaseEstimator.__init__(self)
     self.n_class = n_class
     self.cost_func = cost_func