def test_function_transformer_raise_error_with_mixed_dtype(X_type):
    """Check that `FunctionTransformer.check_inverse` raises error on mixed dtype."""
    mapping = {"one": 1, "two": 2, "three": 3, 5: "five", 6: "six"}
    inverse_mapping = {value: key for key, value in mapping.items()}
    dtype = "object"

    data = ["one", "two", "three", "one", "one", 5, 6]
    data = _convert_container(data, X_type, columns_name=["value"], dtype=dtype)

    def func(X):
        return np.array(
            [mapping[_safe_indexing(X, i)] for i in range(X.size)], dtype=object
        )

    def inverse_func(X):
        return _convert_container(
            [inverse_mapping[x] for x in X],
            X_type,
            columns_name=["value"],
            dtype=dtype,
        )

    transformer = FunctionTransformer(
        func=func, inverse_func=inverse_func, validate=False, check_inverse=True
    )

    msg = "'check_inverse' is only supported when all the elements in `X` is numerical."
    with pytest.raises(ValueError, match=msg):
        transformer.fit(data)
class LogLGBM(LGBMRegressor):
    def __init__(self, target=None, **kwargs):
        super().__init__(**kwargs)
        if target == "Oil_norm":
            self.target_scaler = PowerTransformer(method='box-cox',
                                                  standardize=False)
        elif target == 'Gas_norm':
            self.target_scaler = FunctionTransformer(func=np.log1p,
                                                     inverse_func=np.expm1)
        elif target == 'Water_norm':
            self.target_scaler = FunctionTransformer(func=np.log1p,
                                                     inverse_func=np.expm1)

    def fit(self, X, Y, **kwargs):
        # y_train = np.log1p(Y)
        self.target_scaler.fit(Y.values.reshape(-1, 1) + 1)
        y_train = pd.Series(
            self.target_scaler.transform(Y.values.reshape(-1, 1) + 1).reshape(
                -1, ))
        super(LogLGBM, self).fit(X, y_train, **kwargs)

        return self

    def predict(self, X):
        preds = super(LogLGBM, self).predict(X).reshape(-1, 1)
        preds = self.target_scaler.inverse_transform(preds) - 1
        return preds[:, 0]
예제 #3
0
파일: scale.py 프로젝트: jambo6/batteries
class TensorScaler(TransformerMixin):
    """Scaling for 3D tensors.

    Assumes the size is (..., length, input_channels), reshapes to (..., input_channels), performs the method
    operation and then reshapes back.

    Arguments:
        method (str): Scaling method, one of ('stdsc', 'ma', 'mms').
        scaling_function (transformer): Specification of an sklearn transformer that performs a scaling operation.
            Only one of this or scaling can be specified.
    """
    def __init__(self, method="stdsc", scaling_function=None):
        self.scaling = method

        if all([method is None, scaling_function is None]):
            self.scaler = FunctionTransformer(func=None)
        elif isinstance(method, str):
            self.scaler = SCALERS.get(method)()
            assert (
                self.scaler
                is not None), "Scalings allowed are {}, recieved {}.".format(
                    SCALERS.keys(), method)
        else:
            self.scaler = scaling_function

    @apply_fit_to_channels
    def fit(self, data, labels=None):
        self.scaler.fit(data)
        return self

    @apply_transform_to_channels
    def transform(self, data):
        output_data = torch.Tensor(self.scaler.transform(data))
        return output_data
예제 #4
0
class FunctionTransformerPrim(primitive):
    def __init__(self, random_state=0):
        super(FunctionTransformerPrim,
              self).__init__(name='FunctionTransformer')
        self.id = 11
        self.hyperparams = []
        self.type = 'feature preprocess'
        self.description = "Constructs a transformer from an arbitrary callable. A FunctionTransformer forwards its X (and optionally y) arguments to a user-defined function or function object and returns the result of this function. This is useful for stateless transformations such as taking the log of frequencies, doing custom scaling, etc."
        self.hyperparams_run = {'default': True}
        self.scaler = FunctionTransformer()
        self.accept_type = 'c_t'

    def can_accept(self, data):
        return self.can_accept_c(data)

    def is_needed(self, data):
        # data = handle_data(data)
        # Update
        return True

    def fit(self, data):
        data = handle_data(data)
        self.scaler.fit(data['X'])

    def produce(self, data):
        output = handle_data(data)
        cols = list(output['X'].columns)
        cols = ["{}_qntl".format(x) for x in cols]
        output['X'] = pd.DataFrame(self.scaler.transform(output['X']),
                                   columns=cols)
        final_output = {0: output}
        return final_output
    def test_function_transformer(self):
        x = numpy.array([[6.1, -5], [3.5, -7.8]], dtype=numpy.float32)
        tr = FunctionTransformer(custom_fct)
        tr.fit(x)
        y_exp = tr.transform(x)
        self.assertEqualArray(
            numpy.array([[6.1, 0.], [3.5, 0.]], dtype=numpy.float32), y_exp)

        onnx_model = to_onnx(tr, x)
        oinf = OnnxInference(onnx_model)
        y_onx = oinf.run({'X': x})
        self.assertEqualArray(y_exp, y_onx['variable'])
예제 #6
0
 def test_function_transformer_fft_abs(self):
     for rt, fct in [('py', custom_fft_abs),
                     ('ort', custom_fft_abs_ort)]:
         with self.subTest(runtime=rt):
             x = numpy.array([[6.1, -5], [3.5, -7.8]],
                             dtype=numpy.float32)
             tr = FunctionTransformer(fct)
             tr.fit(x)
             y_exp = tr.transform(x)
             onnx_model = to_onnx(tr, x)
             oinf = OnnxInference(onnx_model)
             y_onx = oinf.run({'X': x})
             self.assertEqualArray(y_exp, y_onx['variable'], decimal=5)
 def test_function_transformer_pickle(self):
     x = numpy.array([[6.1, -5], [3.5, -7.8]], dtype=numpy.float32)
     tr = FunctionTransformer(custom_fct)
     tr.fit(x)
     y_exp = tr.transform(x)
     st = BytesIO()
     # import cloudpickle as pkl
     pkl = pickle
     pkl.dump(tr, st)
     cp = BytesIO(st.getvalue())
     tr2 = pkl.load(cp)
     y_exp2 = tr2.transform(x)
     self.assertEqualArray(y_exp, y_exp2)
예제 #8
0
class _FunctionTransformerImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def transform(self, X):
        return self._wrapped_model.transform(X)
예제 #9
0
def test_functiontransformer_vs_sklearn():
    # Compare msmbuilder.preprocessing.FunctionTransformer
    # with sklearn.preprocessing.FunctionTransformer

    functiontransformerr = FunctionTransformerR()
    functiontransformerr.fit(np.concatenate(trajs))

    functiontransformer = FunctionTransformer()
    functiontransformer.fit(trajs)

    y_ref1 = functiontransformerr.transform(trajs[0])
    y1 = functiontransformer.transform(trajs)[0]

    np.testing.assert_array_almost_equal(y_ref1, y1)
예제 #10
0
파일: noise.py 프로젝트: yushu-liu/whatlies
class Noise:
    """
    This transformer adds gaussian noise to an embeddingset.

    Arguments:
        sigma: the amount of gaussian noise to add
        seed: seed value for random number generator

    Usage:

    ```python
    from whatlies.language import SpacyLanguage
    from whatlies.transformers import Noise

    words = ["prince", "princess", "nurse", "doctor", "banker", "man", "woman",
             "cousin", "neice", "king", "queen", "dude", "guy", "gal", "fire",
             "dog", "cat", "mouse", "red", "bluee", "green", "yellow", "water",
             "person", "family", "brother", "sister"]

    lang = SpacyLanguage("en_core_web_md")
    emb = lang[words]

    emb.transform(Noise(3))
    ```
    """
    def __init__(self, sigma=0.1, seed=42):
        self.is_fitted = False
        self.seed = seed
        self.tfm = FunctionTransformer(
            lambda X: X + np.random.normal(0, sigma, X.shape))

    def __call__(self, embset):
        if not self.is_fitted:
            self.fit(embset)
        return self.transform(embset)

    def fit(self, embset):
        names, X = embset_to_X(embset=embset)
        self.tfm.fit(X)
        self.is_fitted = True

    def transform(self, embset):
        names, X = embset_to_X(embset=embset)
        np.random.seed(self.seed)
        new_vecs = self.tfm.transform(X)
        new_dict = new_embedding_dict(names, new_vecs, embset)
        return EmbeddingSet(
            new_dict,
            name=f"{embset.name}",
        )
예제 #11
0
def test_functiontransformer_vs_sklearn():
    # Compare msmbuilder.preprocessing.FunctionTransformer
    # with sklearn.preprocessing.FunctionTransformer

    functiontransformerr = FunctionTransformerR()
    functiontransformerr.fit(np.concatenate(trajs))

    functiontransformer = FunctionTransformer()
    functiontransformer.fit(trajs)

    y_ref1 = functiontransformerr.transform(trajs[0])
    y1 = functiontransformer.transform(trajs)[0]

    np.testing.assert_array_almost_equal(y_ref1, y1)
class PasstroughEncoder(BaseEstimator, TransformerMixin):
    def __init__(self, passthrough=True):
        self.passthrough = passthrough

    def fit(self, X, y=None):
        self.encoder = FunctionTransformer(None, validate=True)
        self.encoder.fit(X)
        # self.columns = np.array(X.columns)
        return self

    # def get_feature_names(self):
    #     return self.columns

    def transform(self, X):
        return self.encoder.transform(X)
예제 #13
0
 def get_target_variable_scaler(self, y, target_column, group_type=None):
     assert target_column in self.targetScaling
     scaler_type = self.targetScaling[target_column]
     if scaler_type == "identity":
         scaler = FunctionTransformer()
     elif scaler_type == "log":
         scaler = FunctionTransformer(func=np.log, inverse_func=np.exp)
     # Power Transform
     else:
         scaler = PowerTransformer()
     # Fit the scaler
     scaler.fit(y[:, np.newaxis])
     # Be sure that the inverse transform works as expected
     # _y_transformed = pd.Series(scaler.transform(self.dataset.mainDataFrame[target_column][:, np.newaxis])[:, 0])
     # _y_back = scaler.inverse_transform(_y_transformed[:, np.newaxis])[:, 0]
     # assert np.allclose(self.dataset.mainDataFrame[target_column], _y_back)
     self.targetScalers[target_column] = scaler
     return scaler
예제 #14
0
def FncTran(df, target):
  # split into X and y datasets
  X_init = df.drop(target, axis=1)
  y_init = df[target]
  dum = FunctionTransformer()
  scaled = RobScale(X_init)
  print('Function transformer fitting...')
  fit = dum.fit(scaled)
  print('Function transforming...')
  dfit = pd.DataFrame(fit.transform(scaled))
  # drop any NaNs that may have been made (there were few in the landslides vectorization)
  dfity = pd.concat([dfit, y_init], axis=1, join_axes=[y_init.index]).dropna()
  print('The encoded data has shape:',dfity.shape,'\n\n')
  return dfity
예제 #15
0
def test_vectorize_sklearn(constraint, axis):
    # get dataset
    from sklearn.datasets import load_iris
    iris = load_iris()

    # build transform
    ineq = vectorize(constraint, axis)
    from sklearn.preprocessing import FunctionTransformer
    t = FunctionTransformer(func=ineq, validate=False)  #XXX: inverse?

    # test transform
    import numpy as np
    iris_ = t.fit(iris.data).transform(iris.data)
    assert np.all(t._transform(iris_) == iris_)
    def fit(self, X: pd.DataFrame, y=None):

        # Categorial encoders
        for feat in self._cat_feat:
            if feat in X:
                ohe = OneHotEncoder(handle_unknown='ignore', sparse=False)
                self._transf[feat] = ohe.fit(X.loc[~X[feat].isna(), [feat]])

        # Labels and confidence
        for feat, encoder in zip([self._label_feat, self._conf_feat],
                                 [self.encode_labels, self.encode_confidence]):
            if feat in X:
                le = FunctionTransformer(func=encoder, validate=True)
                valid_mask = ~X[feat].isna()
                self._transf[feat] = le.fit(X.loc[valid_mask, [feat]])

        return self
예제 #17
0
def test_check_inverse():
    X_dense = np.array([1, 4, 9, 16], dtype=np.float64).reshape((2, 2))

    X_list = [X_dense, sparse.csr_matrix(X_dense), sparse.csc_matrix(X_dense)]

    for X in X_list:
        if sparse.issparse(X):
            accept_sparse = True
        else:
            accept_sparse = False
        trans = FunctionTransformer(
            func=np.sqrt,
            inverse_func=np.around,
            accept_sparse=accept_sparse,
            check_inverse=True,
            validate=True,
        )
        warning_message = (
            "The provided functions are not strictly"
            " inverse of each other. If you are sure you"
            " want to proceed regardless, set"
            " 'check_inverse=False'."
        )
        with pytest.warns(UserWarning, match=warning_message):
            trans.fit(X)

        trans = FunctionTransformer(
            func=np.expm1,
            inverse_func=np.log1p,
            accept_sparse=accept_sparse,
            check_inverse=True,
            validate=True,
        )
        with warnings.catch_warnings():
            warnings.simplefilter("error", UserWarning)
            Xt = trans.fit_transform(X)

        assert_allclose_dense_sparse(X, trans.inverse_transform(Xt))

    # check that we don't check inverse when one of the func or inverse is not
    # provided.
    trans = FunctionTransformer(
        func=np.expm1, inverse_func=None, check_inverse=True, validate=True
    )
    with warnings.catch_warnings():
        warnings.simplefilter("error", UserWarning)
        trans.fit(X_dense)
    trans = FunctionTransformer(
        func=None, inverse_func=np.expm1, check_inverse=True, validate=True
    )
    with warnings.catch_warnings():
        warnings.simplefilter("error", UserWarning)
        trans.fit(X_dense)
예제 #18
0
class TransformedTargetRegressor(RegressorMixin, BaseEstimator):
    """Meta-estimator to regress on a transformed target.

    Useful for applying a non-linear transformation to the target ``y`` in
    regression problems. This transformation can be given as a Transformer
    such as the QuantileTransformer or as a function and its inverse such as
    ``log`` and ``exp``.

    The computation during ``fit`` is::

        regressor.fit(X, func(y))

    or::

        regressor.fit(X, transformer.transform(y))

    The computation during ``predict`` is::

        inverse_func(regressor.predict(X))

    or::

        transformer.inverse_transform(regressor.predict(X))

    Read more in the :ref:`User Guide <transformed_target_regressor>`.

    .. versionadded:: 0.20

    Parameters
    ----------
    regressor : object, default=None
        Regressor object such as derived from ``RegressorMixin``. This
        regressor will automatically be cloned each time prior to fitting.
        If regressor is ``None``, ``LinearRegression()`` is created and used.

    transformer : object, default=None
        Estimator object such as derived from ``TransformerMixin``. Cannot be
        set at the same time as ``func`` and ``inverse_func``. If
        ``transformer`` is ``None`` as well as ``func`` and ``inverse_func``,
        the transformer will be an identity transformer. Note that the
        transformer will be cloned during fitting. Also, the transformer is
        restricting ``y`` to be a numpy array.

    func : function, default=None
        Function to apply to ``y`` before passing to ``fit``. Cannot be set at
        the same time as ``transformer``. The function needs to return a
        2-dimensional array. If ``func`` is ``None``, the function used will be
        the identity function.

    inverse_func : function, default=None
        Function to apply to the prediction of the regressor. Cannot be set at
        the same time as ``transformer`` as well. The function needs to return
        a 2-dimensional array. The inverse function is used to return
        predictions to the same space of the original training labels.

    check_inverse : bool, default=True
        Whether to check that ``transform`` followed by ``inverse_transform``
        or ``func`` followed by ``inverse_func`` leads to the original targets.

    Attributes
    ----------
    regressor_ : object
        Fitted regressor.

    transformer_ : object
        Transformer used in ``fit`` and ``predict``.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.linear_model import LinearRegression
    >>> from sklearn.compose import TransformedTargetRegressor
    >>> tt = TransformedTargetRegressor(regressor=LinearRegression(),
    ...                                 func=np.log, inverse_func=np.exp)
    >>> X = np.arange(4).reshape(-1, 1)
    >>> y = np.exp(2 * X).ravel()
    >>> tt.fit(X, y)
    TransformedTargetRegressor(...)
    >>> tt.score(X, y)
    1.0
    >>> tt.regressor_.coef_
    array([2.])

    Notes
    -----
    Internally, the target ``y`` is always converted into a 2-dimensional array
    to be used by scikit-learn transformers. At the time of prediction, the
    output will be reshaped to a have the same number of dimensions as ``y``.

    See :ref:`examples/compose/plot_transformed_target.py
    <sphx_glr_auto_examples_compose_plot_transformed_target.py>`.

    """
    @_deprecate_positional_args
    def __init__(self, regressor=None, *, transformer=None,
                 func=None, inverse_func=None, check_inverse=True):
        self.regressor = regressor
        self.transformer = transformer
        self.func = func
        self.inverse_func = inverse_func
        self.check_inverse = check_inverse

    def _fit_transformer(self, y):
        """Check transformer and fit transformer.

        Create the default transformer, fit it and make additional inverse
        check on a subset (optional).

        """
        if (self.transformer is not None and
                (self.func is not None or self.inverse_func is not None)):
            raise ValueError("'transformer' and functions 'func'/"
                             "'inverse_func' cannot both be set.")
        elif self.transformer is not None:
            self.transformer_ = clone(self.transformer)
        else:
            if self.func is not None and self.inverse_func is None:
                raise ValueError("When 'func' is provided, 'inverse_func' must"
                                 " also be provided")
            self.transformer_ = FunctionTransformer(
                func=self.func, inverse_func=self.inverse_func, validate=True,
                check_inverse=self.check_inverse)
        # XXX: sample_weight is not currently passed to the
        # transformer. However, if transformer starts using sample_weight, the
        # code should be modified accordingly. At the time to consider the
        # sample_prop feature, it is also a good use case to be considered.
        self.transformer_.fit(y)
        if self.check_inverse:
            idx_selected = slice(None, None, max(1, y.shape[0] // 10))
            y_sel = _safe_indexing(y, idx_selected)
            y_sel_t = self.transformer_.transform(y_sel)
            if not np.allclose(y_sel,
                               self.transformer_.inverse_transform(y_sel_t)):
                warnings.warn("The provided functions or transformer are"
                              " not strictly inverse of each other. If"
                              " you are sure you want to proceed regardless"
                              ", set 'check_inverse=False'", UserWarning)

    def fit(self, X, y, **fit_params):
        """Fit the model according to the given training data.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Training vector, where n_samples is the number of samples and
            n_features is the number of features.

        y : array-like of shape (n_samples,)
            Target values.

        **fit_params : dict
            Parameters passed to the ``fit`` method of the underlying
            regressor.


        Returns
        -------
        self : object
        """
        y = check_array(y, accept_sparse=False, force_all_finite=True,
                        allow_nd=True, ensure_2d=False, dtype='numeric')

        # store the number of dimension of the target to predict an array of
        # similar shape at predict
        self._training_dim = y.ndim

        # transformers are designed to modify X which is 2d dimensional, we
        # need to modify y accordingly.
        if y.ndim == 1:
            y_2d = y.reshape(-1, 1)
        else:
            y_2d = y
        self._fit_transformer(y_2d)

        # transform y and convert back to 1d array if needed
        y_trans = self.transformer_.transform(y_2d)
        # FIXME: a FunctionTransformer can return a 1D array even when validate
        # is set to True. Therefore, we need to check the number of dimension
        # first.
        if y_trans.ndim == 2 and y_trans.shape[1] == 1:
            y_trans = y_trans.squeeze(axis=1)

        if self.regressor is None:
            from ..linear_model import LinearRegression
            self.regressor_ = LinearRegression()
        else:
            self.regressor_ = clone(self.regressor)

        self.regressor_.fit(X, y_trans, **fit_params)

        return self

    def predict(self, X):
        """Predict using the base regressor, applying inverse.

        The regressor is used to predict and the ``inverse_func`` or
        ``inverse_transform`` is applied before returning the prediction.

        Parameters
        ----------
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Samples.

        Returns
        -------
        y_hat : ndarray of shape (n_samples,)
            Predicted values.

        """
        check_is_fitted(self)
        pred = self.regressor_.predict(X)
        if pred.ndim == 1:
            pred_trans = self.transformer_.inverse_transform(
                pred.reshape(-1, 1))
        else:
            pred_trans = self.transformer_.inverse_transform(pred)
        if (self._training_dim == 1 and
                pred_trans.ndim == 2 and pred_trans.shape[1] == 1):
            pred_trans = pred_trans.squeeze(axis=1)

        return pred_trans

    def _more_tags(self):
        return {'poor_score': True, 'no_validation': True}

    @property
    def n_features_in_(self):
        # For consistency with other estimators we raise a AttributeError so
        # that hasattr() returns False the estimator isn't fitted.
        try:
            check_is_fitted(self)
        except NotFittedError as nfe:
            raise AttributeError(
                "{} object has no n_features_in_ attribute."
                .format(self.__class__.__name__)
            ) from nfe

        return self.regressor_.n_features_in_
ax7.plot(interp_data[:, 6])
plt.show()

#normalise data either use this or transformation
scalar = preprocessing.MinMaxScaler()
scalar.fit(interp_data)
norm = scalar.transform(interp_data)

#transformation technique, keep for only for 1 type
#either normalisation or transformation
from sklearn.preprocessing import FunctionTransformer
from sklearn import preprocessing
from scipy import stats

transformer = FunctionTransformer(stats.zscore)
transformer.fit(interp_data)
transf = transformer.transform(interp_data)
#transf = transf.reshape(-1)
#norm = norm.reshape(-1)

#supervised t-1 w/o unnecessary columns
#if scaled, use norm, if transformed, use transf
temp = shift(transf, 1, cval=np.NaN)
temp = temp[:, 1:]
Last_column = shift(transf[:, 6], 1, cval=np.NaN)  #reform to supervised
data_t1 = np.column_stack([temp, Last_column])

#train and test data + reformulation (without NAN)
#if scaled, use norm, if transformed, use transf
data_t1 = data_t1[1:15000]
Y = transf[1:15000, 0]
예제 #20
0
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import FunctionTransformer
from sklearn.model_selection import train_test_split
# -------------------------------------------------------------------
data = load_breast_cancer()
X = data.data
y = data.target


# -------------------------------------------------------------------
def function1(z):
    return np.sqrt(z)
    # return np.log1p(z)
    # return np.power(z,4)


f = FunctionTransformer(func=function1, validate=True)
f.fit(X)
x_f = f.transform(X)
# -------------------------------------------------------------------
x_train, x_test, y_train, y_test = train_test_split(x_f, y, test_size=0.2)
# -------------------------------------------------------------------
logreg = LogisticRegression(max_iter=1000)
logreg.fit(x_train, y_train)
result = logreg.predict(x_test)
print(accuracy_score(y_test, result))
conf = confusion_matrix(y_test, result)
print('confusion matrix \n', conf)
예제 #21
0
class AdvancedTransformedTargetRegressor(TransformedTargetRegressor):
    """Expand :class:`sklearn.compose.TransformedTargetRegressor`."""
    @property
    def coef_(self):
        """numpy.ndarray: Model coefficients."""
        return self.regressor_.coef_

    @property
    def feature_importances_(self):
        """numpy.ndarray: Feature importances."""
        return self.regressor_.feature_importances_

    def fit(self, x_data, y_data, **fit_kwargs):
        """Expand :meth:`fit` to accept kwargs."""
        (y_2d,
         regressor_kwargs) = self.fit_transformer_only(y_data, **fit_kwargs)

        # Transform y and convert back to 1d array if necessary
        y_trans = self.transformer_.transform(y_2d)
        if y_trans.ndim == 2 and y_trans.shape[1] == 1:
            y_trans = y_trans.squeeze(axis=1)

        # Perform linear regression if regressor is not given
        if self.regressor is None:
            self.regressor_ = LinearRegression()
        else:
            self.regressor_ = clone(self.regressor)

        # Fit regressor with kwargs
        self.regressor_.fit(x_data, y_trans, **regressor_kwargs)
        return self

    def fit_transformer_only(self, y_data, **fit_kwargs):
        """Fit only ``transformer`` step."""
        y_data = check_array(y_data,
                             accept_sparse=False,
                             force_all_finite=True,
                             ensure_2d=False,
                             dtype='numeric')
        self._training_dim = y_data.ndim

        # Process kwargs
        (_, regressor_kwargs) = self._get_fit_params(fit_kwargs)

        # Transformers are designed to modify X which is 2D, modify y_data
        # FIXME: Transformer does NOT use transformer_kwargs
        if y_data.ndim == 1:
            y_2d = y_data.reshape(-1, 1)
        else:
            y_2d = y_data
        self._fit_transformer(y_2d)
        return (y_2d, regressor_kwargs)

    def predict(self, x_data, always_return_1d=True, **predict_kwargs):
        """Expand :meth:`predict()` to accept kwargs."""
        check_is_fitted(self)
        if not hasattr(self, 'regressor_'):
            raise NotFittedError(
                f"Regressor of {self.__class__} is not fitted yet, call fit() "
                f"first")

        # Kwargs for returning variance or covariance
        if ('return_std' in predict_kwargs and 'return_std' in getfullargspec(
                self.regressor_.predict).args):
            raise NotImplementedError(
                f"Using keyword argument 'return_std' for final regressor "
                f"{self.regressor_.__class__} is not supported yet, only "
                f"'return_var' is allowed. Expand the regressor to accept "
                f"'return_var' instead (see 'esmvaltool/diag_scripts/mlr"
                f"/models/gpr_sklearn.py' for an example)")
        mlr.check_predict_kwargs(predict_kwargs)
        return_var = predict_kwargs.get('return_var', False)
        return_cov = predict_kwargs.get('return_cov', False)

        # Prediction
        prediction = self.regressor_.predict(x_data, **predict_kwargs)
        if return_var or return_cov:
            pred = prediction[0]
        else:
            pred = prediction
        if pred.ndim == 1:
            pred_trans = self.transformer_.inverse_transform(
                pred.reshape(-1, 1))
        else:
            pred_trans = self.transformer_.inverse_transform(pred)
        if self._to_be_squeezed(pred_trans, always_return_1d=always_return_1d):
            pred_trans = pred_trans.squeeze(axis=1)
        if not (return_var or return_cov):
            return pred_trans

        # Return scaled variance or covariance if desired
        err = prediction[1]
        if not hasattr(self.transformer_, 'scale_'):
            raise NotImplementedError(
                f"Transforming of additional prediction output (e.g. by "
                f"'return_var' or 'return_cov') is not supported for "
                f"transformer {self.transformer_.__class__} yet, the "
                f"necessary attribute 'scale_' is missing")
        scale = self.transformer_.scale_
        if scale is not None:
            err *= scale**2
        if self._to_be_squeezed(err, always_return_1d=always_return_1d):
            err = err.squeeze(axis=1)
        return (pred_trans, err)

    def _get_fit_params(self, fit_kwargs):
        """Separate ``transformer`` and ``regressor`` kwargs."""
        steps = [
            ('transformer', self.transformer),
            ('regressor', self.regressor),
        ]
        fit_params = _get_fit_parameters(fit_kwargs, steps, self.__class__)
        fit_params.setdefault('transformer', {})
        fit_params.setdefault('regressor', {})

        # FIXME
        if fit_params['transformer']:
            raise NotImplementedError(
                f"Fit parameters {fit_params['transformer']} for transformer "
                f"{self.transformer.__class__} of {self.__class__} are not "
                f"supported at the moment")

        return (fit_params['transformer'], fit_params['regressor'])

    def _fit_transformer(self, y_data):
        """Check transformer and fit transformer."""
        if (self.transformer is not None
                and (self.func is not None or self.inverse_func is not None)):
            raise ValueError("'transformer' and functions 'func'/"
                             "'inverse_func' cannot both be set.")
        if self.transformer is not None:
            self.transformer_ = clone(self.transformer)
        else:
            if self.func is not None and self.inverse_func is None:
                raise ValueError(
                    "When 'func' is provided, 'inverse_func' must also be "
                    "provided")
            self.transformer_ = FunctionTransformer(
                func=self.func,
                inverse_func=self.inverse_func,
                validate=True,
                check_inverse=self.check_inverse)
        self.transformer_.fit(y_data)
        if self.check_inverse:
            idx_selected = slice(None, None, max(1, y_data.shape[0] // 10))
            y_sel = _safe_indexing(y_data, idx_selected)
            y_sel_t = self.transformer_.transform(y_sel)
            if not np.allclose(y_sel,
                               self.transformer_.inverse_transform(y_sel_t)):
                warnings.warn(
                    "The provided functions or transformer are "
                    "not strictly inverse of each other. If "
                    "you are sure you want to proceed regardless, "
                    "set 'check_inverse=False'", UserWarning)

    def _to_be_squeezed(self, array, always_return_1d=True):
        """Check if ``array`` should be squeezed or not."""
        squeeze = array.ndim == 2 and array.shape[1] == 1
        if not always_return_1d:
            squeeze = squeeze and self._training_dim == 1
        return squeeze
예제 #22
0
class KCenterGreedy(QueryStrategy):

    """K-Center-Greedy

    This class implements K-Center-Greedy active learning algorithm [1]_.

    Parameters
    ----------
    transformer: :py:class:`An sklearn estimator supporting transform and/or fit_transform` object instance
        The base model used for training.


    References
    ----------

    .. [1] Core-Set... 
    """

    def __init__(self, *args, **kwargs):
        super(KCenterGreedy, self).__init__(*args, **kwargs)

        self.transformer = kwargs.pop('transformer', None)
        if self.transformer is None:
            self.transformer = FunctionTransformer()
        if not hasattr(self.transformer, "transform"):
            raise TypeError(
                "transformer has method: .transform()"
            )
        
        # initialize the transformer on labeled pool
        # Poy: We don't need it.
        # self.transformer.fit(self.dataset.X)

    def make_query(self, n=1):
        """Return the index of the sample to be queried and labeled and
        selection score of each sample. Read-only.

        No modification to the internal states.

        Returns
        -------
        ask_ids : list
            The batch of indexes of the next unlabeled samples to be queried and labeled.

        """
        dataset = self.dataset
        # Train CNNs (models) from scratch (retrain) after each iteration [1]_.
        X_lbl_curr, y_lbl_curr = dataset.get_labeled_entries()
        idx_lbl_mask = dataset.get_labeled_mask()
        X = dataset._X
        self.transformer.fit(X_lbl_curr, y_lbl_curr)
        embed = self.transformer.transform(X)
        # Reference. KH Huang
        # https://github.com/ariapoy/deep-active-learning/blob/master/query_strategies/kcenter_greedy.py#L15
        # embed_label = embed[idx_lbl_mask]
        # embed_unlabel = embed[~idx_lbl_mask]
        # dist_mat = cdist(embed_unlabel, embed_label, metric="euclidean")
        dist_mat = cdist(embed, embed, metric="euclidean")
        dist_mat_ublxlbl = dist_mat[~idx_lbl_mask, :][:, idx_lbl_mask]

        # scores: min_{j \in s}, (s: label pool)
        res = []
        for b in range(n):
            scores = np.min(dist_mat_ublxlbl, axis=1)
            ask_id_pos = np.argmax(scores)
            unlabeled_entry_ids, _ = dataset.get_unlabeled_entries()
            ask_id = unlabeled_entry_ids[ask_id_pos]
            res.append(ask_id)
            # update dist_mat_ublxlbl
            # solve ckp2
            if idx_lbl_mask[ask_id] != True:
                idx_lbl_mask[ask_id] = True
            else:
                print("ind {0} in already selected".format(ask_id))
                continue

            dist_mat_ublxlbl = np.delete(dist_mat_ublxlbl, ask_id_pos, 0)
            dist_mat_ublxlbl = np.append(dist_mat_ublxlbl, dist_mat[~idx_lbl_mask, ask_id][:, None], axis=1)

        return res
예제 #23
0
def empty_transformer():
    transformer_ = FunctionTransformer(validate=True)
    X = np.random.uniform(20, 30, (1000, 10))
    transformer_.fit(X)
    return transformer_
예제 #24
0
import warnings
import tensorflow as tf

warnings.filterwarnings("ignore", category=DeprecationWarning)

seed = 444
np.random.seed(seed)
tf.set_random_seed(seed)

df = pd.read_csv("forestfires.csv", index_col=None)

features = ['temp', 'RH', 'wind', 'rain']

Y = df['area'].values
transformer = FunctionTransformer(np.log1p, inverse_func=np.expm1)
transformer.fit(Y)
unscaled_Y = Y
Y = transformer.transform(Y)[0]
X = df[features]

input_size = len(features)


def nn1_model():
    model = Sequential()
    model.add(Dense(units=8, input_dim=input_size, activation='relu'))
    model.add(Dense(units=4, activation='relu'))
    model.add(Dense(units=1))
    model.compile(loss='mean_absolute_error', optimizer='adam')
    return model
예제 #25
0
def log_transformer():
    transformer_ = FunctionTransformer(np.log, np.exp, validate=True)
    X = np.random.uniform(20, 30, (1000, 10))
    transformer_.fit(X)
    return transformer_
예제 #26
0
def custom_transformer():
    transformer_ = FunctionTransformer(np.square, np.sqrt, validate=True)
    transformer_.func_inC = 'pow({x}, 2)'
    X = np.random.uniform(20, 30, (1000, 10))
    transformer_.fit(X)
    return transformer_
예제 #27
0
class NNPredictorNumerical:
    def __init__(self, numerical_features, data, **kwargs):
        self.numerical_features = numerical_features
        self.scaler = StandardScaler()
        self.target_scaler = FunctionTransformer(func=np.log1p,inverse_func=np.expm1())

        self.model = None
        self.data = data
        self.inputs = []
        self.build_full_network(**kwargs)

    def build_full_network(self, optimizer=SGD(lr=0.001)):
        # Create the categorical embeddings first:
        input_num = Input(shape=(len(self.numerical_features),))

        dense_num = Dense(256, activation="relu")(input_num)
        m = Dropout(rate=0.2)(dense_num)
        dense_num = Dense(128, activation="relu")(m)
        m = Dropout(rate=0.2)(dense_num)
        dense_num = Dense(64, activation="relu")(m)
        m = Dense(16, activation="relu")(dense_num)
        m = Dropout(rate=0.2)(m)
        m = Dense(8, activation="relu")(m)
        m = Dropout(rate=0.2)(m)

        m = Dense(4, activation="relu")(m)
        output = Dense(1, activation="linear")(m)

        model = Model(input_num, output)
        model.compile(loss="mae", optimizer=optimizer)
        self.model = model

    def fit(self, x, y, **kwargs):
        y = y.reshape(-1, 1)
        self.target_scaler.fit(y)
        y = self.target_scaler.transform(y)

        self.model.fit(x, y, **kwargs)

    def predict(self, x, **kwargs):
        y = self.model.predict(x, **kwargs)
        y = self.target_scaler.inverse_transform(y)
        return y

    def preprocess_data(self, X_train, X_val, X_test):
        input_list_train = []
        input_list_val = []
        input_list_test = []

        for c in self.numerical_features:
            mu = np.nanmean(X_train[c])
            X_train[c] = X_train[c].fillna(mu)
            X_test[c] = X_test[c].fillna(mu)
            X_val[c] = X_val[c].fillna(mu)

        # Fit scaler
        self.scaler.fit(X_train[self.numerical_features])
        X_train[self.numerical_features] = self.scaler.transform(
            X_train[self.numerical_features]
        )
        X_test[self.numerical_features] = self.scaler.transform(
            X_test[self.numerical_features]
        )
        X_val[self.numerical_features] = self.scaler.transform(
            X_val[self.numerical_features]
        )

        input_list_train.append(X_train[self.numerical_features].values)
        input_list_val.append(X_val[self.numerical_features].values)
        input_list_test.append(X_test[self.numerical_features].values)

        return input_list_train, input_list_val, input_list_test
예제 #28
0
import numpy as np
from sklearn.preprocessing import FunctionTransformer

X = [[4, 1, 2, 2], [1, 3, 9, 3], [5, 7, 5, 1]]


def function1(z):
    return np.sqrt(z)


FT = FunctionTransformer(func=function1)
FT.fit(X)
newdata = FT.transform(X)
newdata