Example #1
0
    def wrapped(cells):
        x = []
        for i in range(nr_args):
            cell = (cells.iloc[i] if hasattr(cells, 'iloc') else
                    cells[i]) if nr_args > 1 else cells
            if isinstance(cell, (np.ndarray, pd.Series, pd.DataFrame, Number,
                                 list, tuple, set)):
                if hasattr(cell, 'item') and sum(cell.shape) <= 1:
                    x.append(
                        t.Tensor(
                            unpack_nested_arrays(cell.item()) if cell.dtype ==
                            'object' else [cell.item()]))
                elif hasattr(cell, 'values'):
                    x.append(
                        t.Tensor(
                            unpack_nested_arrays(cell.values) if cell.dtype ==
                            'object' else cell.values))
                else:
                    x.append(
                        t.Tensor(
                            cell if isinstance(cell, Iterable) else [cell]))
            else:
                x.append(cell)

        x = func(*x)
        return (
            x.numpy() if sum(x.shape) > 1 else x.item()) if return_numpy else x
    def calculate_loss(self, fold, x, y_true, weight) -> float:
        skm = self.sk_model
        y_pred = skm.predict(
            _AbstractSkModel.reshape_rnn_as_ar(unpack_nested_arrays(x)))
        y_true = unpack_nested_arrays(
            y_true, split_multi_index_rows=False).reshape(y_pred.shape)
        w = weight.values.reshape(-1, ) if weight is not None else None

        return metrics.mean_squared_error(y_true, y_pred, sample_weight=w)
Example #3
0
    def get_values(self,
                   split_multi_index_rows=True,
                   squeeze=False,
                   dtype=None):
        # get raw values
        values = unpack_nested_arrays(self.df, split_multi_index_rows, dtype)

        # return in multi level shape if multi index is used
        def reshape_when_multi_index_column(values):
            if has_indexed_columns(self.df) and isinstance(
                    self.df.columns, pd.MultiIndex):
                index_shape = multi_index_shape(self.df.columns)
                try:
                    # try to reshape the nested arrays into the shape of the multi index
                    values = values.reshape((values.shape[0], ) + index_shape +
                                            values.shape[len(index_shape):])
                except ValueError as ve:
                    # but it might well be that the shapes do not match, then just ignore the index shape
                    if not "cannot reshape array" in str(ve):
                        raise ve

            if squeeze and values.ndim > 2 and values.shape[2] == 1:
                values = values.reshape(values.shape[:-1])

            return values

        # if values is list reshape each array
        return [reshape_when_multi_index_column(v) for v in values] if isinstance(values, List) else \
            reshape_when_multi_index_column(values)
Example #4
0
 def test_nested_values_invalid_shape(self):
     """given a non-symetrical nested array"""
     df = pd.DataFrame({
         "a": [[1, 2] for _ in range(5)],
         "b": [[1, 2, 3] for _ in range(5)],
     })
     """when extracted then shape can not be derived"""
     self.assertRaises(ValueError, lambda: unpack_nested_arrays(df))
Example #5
0
 def test_nested_values(self):
     """given a symetrical nested array"""
     df = pd.DataFrame({
         "a": [[1, 2] for _ in range(5)],
         "b": [[1, 2] for _ in range(5)],
     })
     """when extracted then shape is 5,2,2"""
     self.assertEqual((5, 2, 2), unpack_nested_arrays(df).shape)
Example #6
0
def ecdf(v):
    if isinstance(v, (_pd.DataFrame, _pd.Series)):
        v = unpack_nested_arrays(v)

    shape = v.shape
    x = v.flatten()
    x = _np.sort(x)
    return ((_np.searchsorted(x, v, side='right') + 1) / len(v)).reshape(shape)
    def fit_batch(self, x: pd.DataFrame, y: pd.DataFrame, weight: pd.DataFrame,
                  fold: int, **kwargs):
        # convert data frames to numpy arrays
        _x = _AbstractSkModel.reshape_rnn_as_ar(
            unpack_nested_arrays(x, split_multi_index_rows=False))
        _y = unpack_nested_arrays(y, split_multi_index_rows=False)
        _w = unpack_nested_arrays(weight, split_multi_index_rows=False)

        _y = _y.reshape(
            (len(_x), -1)) if _y.ndim > 1 and _y.shape[1] == 1 else _y
        _y = _y.reshape(len(_x)) if _y.ndim == 2 and _y.shape[1] == 1 else _y
        if self._label_shape is None: self._label_shape = _y.shape

        par = self._fit_meta_data
        partial_fit = any([
            size > 1 for size in [par.epochs, par.batch_size, par.fold_epochs]
            if size is not None
        ])

        if partial_fit:
            # use partial fit whenever possible partial_fit
            if hasattr(self.sk_model, "partial_fit"):
                kw_classes = {
                    "classes": kwargs["classes"]
                } if "classes" in kwargs else {}
                try:
                    self.sk_model = self.sk_model.partial_fit(
                        _x, _y, **kw_classes)
                except Exception as e:
                    if "classes" in kwargs:
                        raise e
                    else:
                        raise ValueError(
                            "You might need to pass 'classes' argument for partial fitting",
                            e)
            else:
                raise ValueError(
                    f"This of model does not support `partial_fit` {type(self.sk_model)} - "
                    f"and therefore does not support epochs or batches.")
        else:
            self.sk_model = self.sk_model.fit(_x, _y)
    def calculate_loss(self, fold, x, y_true, weight):
        skm = self.sk_model
        y_pred = self._predict(skm, x, fold=fold)
        y_true = unpack_nested_arrays(
            y_true, split_multi_index_rows=False).reshape(y_pred.shape)
        w = weight.values.reshape(-1, ) if weight is not None else None

        if isinstance(self.sk_model, ClassifierMixin):
            # calculate: # sklearn.metrics.log_loss
            return metrics.log_loss(y_true, y_pred, sample_weight=w)
        else:
            # calculate: metrics.mean_squared_error
            return metrics.mean_squared_error(y_true, y_pred, sample_weight=w)
Example #9
0
 def test_nested_values_row_multiindex(self):
     """given a row-MultiIndex DataFrame"""
     df = pd.DataFrame(np.ones((10, 3)),
                       index=pd.MultiIndex.from_tuples([
                           *[("A", i) for i in range(7)],
                           *[("B", i) for i in range(7, 10)],
                       ]))
     """when extracting values"""
     values = unpack_nested_arrays(df)
     """then we have a list of numpy arrays"""
     self.assertEqual(2, len(values))
     self.assertEqual((7, 3), values[0].shape)
     self.assertEqual((3, 3), values[1].shape)
Example #10
0
    def test_nested_values_column_multiindex(self):
        """given a symetrical nested array"""
        df = pd.DataFrame(pd.DataFrame([[np.array([1, 2]) for _ in range(5)],
                                        [np.array([1, 2]) for _ in range(5)],
                                        [np.array([1, 2]) for _ in range(5)],
                                        [np.array([1, 2])
                                         for _ in range(5)]]).T.values,
                          columns=pd.MultiIndex.from_tuples([
                              ("A", 0), ("A", 1), ("B", 0), ("B", 1)
                          ]))

        print(df)
        """when extracted then shape is 5,2,2"""
        self.assertEqual((5, 4, 2), unpack_nested_arrays(df).shape)
Example #11
0
    def values(self) -> np.ndarray:
        """
        In contrast to pandas.values the ml.values returns a n-dimensional array with respect to MultiIndex and/or
        nested numpy arrays inside of cells

        :return: numpy array with shape of MultiIndex and/or nested arrays from cells
        """

        # get raw values
        values = unpack_nested_arrays(self.df)

        # return in multi level shape if multi index is used
        if hasattr(self.df, 'columns') and isinstance(self.df.columns, pd.MultiIndex):
            index_shape = multi_index_shape(self.df.columns)
            values = values.reshape((values.shape[0],) + index_shape + values.shape[len(index_shape):])

        return values
Example #12
0
    def _decode(self, latent_features: pd.DataFrame, samples,
                **kwargs) -> Typing.PatchedDataFrame:
        skm = self.sk_model
        if not hasattr(skm, 'coefs_'):
            raise ValueError("Model needs to be 'fit' first!")

        decoder = call_callable_dynamic_args(
            MLPRegressor, **{
                "hidden_layer_sizes": self.decoder_layers,
                **self.kwargs
            })
        decoder.coefs_ = skm.coefs_[len(self.encoder_layers):].copy()
        decoder.intercepts_ = skm.intercepts_[len(self.encoder_layers):].copy()
        decoder.n_layers_ = len(decoder.coefs_) + 1
        decoder.n_outputs_ = self.layers[-1]
        decoder.out_activation_ = skm.out_activation_

        decoded = decoder.predict(
            _AbstractSkModel.reshape_rnn_as_ar(
                unpack_nested_arrays(latent_features,
                                     split_multi_index_rows=False)))
        return to_pandas(decoded, latent_features.index, self._feature_columns)
Example #13
0
    def _predict(self,
                 skm,
                 features: pd.DataFrame,
                 samples=1,
                 **kwargs) -> np.ndarray:
        x = _AbstractSkModel.reshape_rnn_as_ar(
            unpack_nested_arrays(features, split_multi_index_rows=False))
        is_probabilistic = callable(getattr(skm, 'predict_proba', None))

        def predictor():
            if is_probabilistic:
                y_hat = skm.predict_proba(x)
                binary_classifier = len(
                    self._label_shape) == 1 or self._label_shape[1] == 1
                return (1 -
                        y_hat[:, 0]) if binary_classifier else y_hat.reshape(
                            -1, *self._label_shape[1:])
            else:
                return skm.predict(x)

        return np.array([predictor() for _ in range(samples)]).swapaxes(
            0, 1) if samples > 1 else predictor()
Example #14
0
        def scaler(row):
            values = unpack_nested_arrays(row, split_multi_index_rows=False)
            values_2d = values.reshape(-1, 1)

            if normalizer == 'minmax01':
                return MinMaxScaler().fit(values_2d).transform(
                    values_2d).reshape(values.shape)
            elif normalizer == 'minmax-11':
                return MinMaxScaler(feature_range=(
                    -1, 1)).fit(values_2d).transform(values_2d).reshape(
                        values.shape)
            elif normalizer == 'standard':
                # (value - mean) / std
                return values - values.mean() / np.std(values)
            elif normalizer == 'uniform':
                return ecdf(values_2d).reshape(values.shape)
            elif callable(normalizer):
                return normalizer(row)
            else:
                raise ValueError(
                    'unknown normalizer need to one of: [minmax01, minmax-11, uniform, standard, callable(r)]'
                )
Example #15
0
 def _auto_encode(self, features: pd.DataFrame, samples,
                  **kwargs) -> Typing.PatchedDataFrame:
     x = _AbstractSkModel.reshape_rnn_as_ar(
         unpack_nested_arrays(features, split_multi_index_rows=False))
     return to_pandas(self.sk_model.predict(x), features.index,
                      self._labels_columns)