Esempio n. 1
0
def ta_macd(df: Typing.PatchedPandas,
            fast_period=12,
            slow_period=26,
            signal_period=9,
            relative=True) -> _PANDAS:
    if has_indexed_columns(df):
        res = _pd.DataFrame({}, index=df.index)
        for col in df.columns.to_list():
            d = ta_macd(df[col], fast_period, slow_period, slow_period,
                        relative)
            d.columns = _pd.MultiIndex.from_product([[col], d.columns])
            res = res.join(d)

        res.columns = _pd.MultiIndex.from_tuples(res.columns.to_list())
        return res
    else:
        fast = _ema(df, fast_period)
        slow = _ema(df, slow_period)

        macd = (fast / slow - 1) if relative else (fast - slow)
        signal = _ema(macd, signal_period)
        hist = macd - signal
        suffix = f'{fast_period},{slow_period},{signal_period}'

        for label, frame in {
                f"macd_{suffix}": macd,
                f"signal_{suffix}": signal,
                f"histogram_{suffix}": hist
        }.items():
            frame.name = label

        macd = macd.to_frame() if isinstance(macd, _pd.Series) else macd
        return macd.join(signal).join(hist)
Esempio n. 2
0
def plot_bar(df,
             fields,
             figsize=None,
             ax=None,
             colors=None,
             color_map: str = 'afmhot',
             **kwargs):
    data = get_pandas_object(df, fields)

    if has_indexed_columns(data):
        for col in data.columns:
            plot_bar(data, col, figsize, ax, colors, color_map, **kwargs)
        return ax

    colors = get_pandas_object(df, colors)

    if ax is None:
        fig, ax = new_fig_ts_axis(figsize)

    bars = ax.bar(df.index, height=data.values, label=str(data.name), **kwargs)
    if colors is not None:
        color_function = plt.get_cmap(color_map)
        domain = (colors.values.min(), colors.values.max()) if isinstance(
            colors, PandasObject) else (colors.min(), colors.max())
        r = ReScaler(domain, (0, 1))

        for i, c in enumerate(colors):
            color = color_function(r(c))
            # TODO if alpha is provided then color = (*color[:-1], alpha)
            bars[i].set_color(color)

    return ax
Esempio n. 3
0
def ta_one_hot_encode_discrete(po: Union[pd.Series, pd.DataFrame],
                               drop_na=True,
                               nr_of_classes=None,
                               offset=None,
                               expand=False) -> Union[pd.Series, pd.DataFrame]:
    if has_indexed_columns(po):
        return pd.DataFrame(
            [ta_one_hot_encode_discrete(po[col]) for col in po.columns]).T
    else:
        if drop_na:
            po = po.dropna()

        if offset is None:
            offset = po.min()

        values = po.values.astype(int)
        values = values - offset

        if nr_of_classes is None:
            nr_of_classes = values.max() + 1

        label_binarizer = LabelBinarizer()
        label_binarizer.fit(range(int(nr_of_classes)))

        if expand:
            columns = expand if isinstance(expand, Iterable) else None
            return pd.DataFrame(label_binarizer.transform(values),
                                index=po.index,
                                columns=columns)
        else:
            return pd.Series(label_binarizer.transform(values).tolist(),
                             index=po.index,
                             name=po.name)
Esempio n. 4
0
def ta_backtest(signal: Typing.PatchedDataFrame,
                prices: Typing.PatchedPandas,
                action: Callable[[pd.Series], Tuple[int, float]],
                slippage: Callable[[float], float] = lambda _: 0):
    if has_indexed_columns(signal):
        assert len(signal.columns) == len(
            prices.columns), "Signal and Prices need the same shape!"
        res = pd.DataFrame({},
                           index=signal.index,
                           columns=pd.MultiIndex.from_product([[], []]))

        for i in range(len(signal.columns)):
            df = ta_backtest(signal[signal.columns[i]],
                             prices[prices.columns[i]], action, slippage)

            top_level_name = ",".join(prices.columns[i]) if isinstance(
                prices.columns[i], tuple) else prices.columns[i]
            df.columns = pd.MultiIndex.from_product([[top_level_name],
                                                     df.columns.to_list()])
            res = res.join(df)

        return res

    assert isinstance(prices, pd.Series), "prices need to be a series!"
    trades = StreamingTransactionLog()

    def trade_log_action(row):
        direction_amount = action(row)
        if isinstance(direction_amount, tuple):
            trades.perform_action(*direction_amount)
        else:
            trades.rebalance(float(direction_amount))

    signal.to_frame().apply(trade_log_action, axis=1, raw=True)
    return trades.evaluate(prices.rename("price"), slippage)
Esempio n. 5
0
def plot_line(df, fields, figsize=None, ax=None, **kwargs):
    data = get_pandas_object(df, fields)

    if ax is None:
        fig, ax = new_fig_ts_axis(figsize)

    if has_indexed_columns(data):
        for col in data.columns:
            plot_line(data, col, figsize, ax, **kwargs)
    else:
        ax.plot(df.index, data.values, label=str(data.name), **kwargs)

    return ax
Esempio n. 6
0
def ta_wilders(df: _PANDAS, period=12) -> _PANDAS:
    if has_indexed_columns(df):
        resdf = _pd.DataFrame({}, index=df.index)
        for col in df.columns:
            s = df[col].dropna()
            res = _np.zeros(s.shape)
            _ws(s.values, period, res)
            resdf = resdf.join(_pd.DataFrame({col: res}, index=s.index))

        res = resdf
    else:
        res = ta_wilders(df.to_frame(), period).iloc[:, 0]

    return _wcs(f"wilders_{period}", res)
Esempio n. 7
0
def ta_multi_bbands(s: _pd.Series, period=5, stddevs=[0.5, 1.0, 1.5, 2.0], ddof=1, include_mean=True) -> _PANDAS:
    assert not has_indexed_columns(s)
    mean = s.rolling(period).mean().rename("mean")
    std = s.rolling(period).std(ddof=ddof)
    df = _pd.DataFrame({}, index=mean.index)

    for stddev in reversed(stddevs):
        df[f'lower-{stddev}'] = mean - (std * stddev)

    if include_mean:
        df["mean"] = mean

    for stddev in stddevs:
        df[f'upper-{stddev}'] = mean + (std * stddev)

    return df
Esempio n. 8
0
def ta_multi_ma(df: _t.PatchedDataFrame, average_function='sma', period=12, factors=_np.linspace(1 - 0.2, 1 + 0.2, 5)) -> _t.PatchedDataFrame:
    ma = {'sma': ta_sma, 'ema': ta_ema, 'wilder': ta_wilders}
    res = _pd.DataFrame({}, index=df.index)

    if has_indexed_columns(df):
        res = None
        for col in df.columns.to_list():
            _df = ta_multi_ma(df[col], average_function, period, factors)
            _df = add_multi_index(_df, col)
            res = inner_join(res, _df, force_multi_index=True)

        return res

    for x in factors:
        res[f"{df.name}_{average_function}({period})({x:.3f})"] = ma[average_function](df, period) * x

    return res
Esempio n. 9
0
        def reshape_when_multi_index_column(values):
            if has_indexed_columns(self.df) and isinstance(
                    self.df.columns, pd.MultiIndex):
                index_shape = multi_index_shape(self.df.columns)
                try:
                    # try to reshape the nested arrays into the shape of the multi index
                    values = values.reshape((values.shape[0], ) + index_shape +
                                            values.shape[len(index_shape):])
                except ValueError as ve:
                    # but it might well be that the shapes do not match, then just ignore the index shape
                    if not "cannot reshape array" in str(ve):
                        raise ve

            if squeeze and values.ndim > 2 and values.shape[2] == 1:
                values = values.reshape(values.shape[:-1])

            return values
Esempio n. 10
0
def with_column_suffix(suffix, po, ref_po=None):
    if ref_po is None:
        ref_po = po

    if has_indexed_columns(po):
        if isinstance(po.index, pd.MultiIndex):
            po.columns = pd.MultiIndex.from_tuples([
                (suffix, *col) for col in ref_po.columns.to_list()
            ])
            return po
        else:
            po.columns = ref_po.columns
            return po.add_suffix(f'_{suffix}')
    else:
        if isinstance(po.name, tuple):
            return po.rename((suffix, *ref_po.name))
        else:
            return po.rename(f'{ref_po.name}_{suffix}')
Esempio n. 11
0
def ta_one_hot_encode_discrete(po: Union[pd.Series, pd.DataFrame],
                               drop_na=True) -> Union[pd.Series, pd.DataFrame]:
    if has_indexed_columns(po):
        return pd.DataFrame(
            [ta_one_hot_encode_discrete(po[col]) for col in po.columns]).T
    else:
        if drop_na:
            po = po.dropna()

        values = po.values.astype(int)
        offset = po.min()
        values = values - offset
        nr_of_classes = values.max() + 1

        label_binarizer = LabelBinarizer()
        label_binarizer.fit(range(int(nr_of_classes)))
        return pd.Series(label_binarizer.transform(values).tolist(),
                         index=po.index,
                         name=po.name)
Esempio n. 12
0
    def values(self) -> np.ndarray:
        """
        In contrast to pandas.values the ml.values returns a n-dimensional array with respect to MultiIndex and/or
        nested numpy arrays inside of cells

        :return: numpy array with shape of MultiIndex and/or nested arrays from cells
        """

        # get raw values
        values = unpack_nested_arrays(self.df)

        # return in multi level shape if multi index is used
        if has_indexed_columns(self.df) and isinstance(self.df.columns,
                                                       pd.MultiIndex):
            index_shape = multi_index_shape(self.df.columns)
            values = values.reshape((values.shape[0], ) + index_shape +
                                    values.shape[len(index_shape):])

        return values
Esempio n. 13
0
def plot_stacked_bar(df,
                     columns,
                     figsize=None,
                     ax=None,
                     padding=0.02,
                     colors=None,
                     color_map: str = 'afmhot',
                     **kwargs):
    data = get_pandas_object(df, columns)

    if not has_indexed_columns(data):
        return plot_bar(df, columns, figsize, ax, colors, color_map, **kwargs)

    # TODO add colors ...
    if ax is None:
        fig, ax = new_fig_ts_axis(figsize)

    if padding is not None:
        b, t = ax.get_ylim()

        if b == 0 and t == 1:
            b = np.inf
            t = -np.inf

        ax.set_ylim(
            min(data.values.min(), b) * (1 - padding),
            max(data.values.max(), t) * (1 + padding))

    bottom = None
    for column in columns:
        data = get_pandas_object(df, column)

        if bottom is not None:
            kwargs["bottom"] = bottom
            height = data - bottom
        else:
            height = data

        bottom = height if bottom is None else bottom + height
        ax.bar(mdates.date2num(df.index), height, **kwargs)

    return ax
Esempio n. 14
0
def ta_draw_down(df: _PANDAS, return_dates=False, return_duration=False):
    if has_indexed_columns(df):
        res = _pd.DataFrame({}, index=df.index)
        for col in df.columns.to_list():
            d = ta_draw_down(df[col], return_dates, return_duration)
            d.columns = _pd.MultiIndex.from_product([[col], d.columns])
            res = res.join(d)

        res.columns = _pd.MultiIndex.from_tuples(res.columns.to_list())
        return res
    else:
        ds = df
        pmin_pmax = (
            ds.diff(-1) >
            0).astype(int).diff()  # <- -1 indicates pmin, +1 indicates pmax
        pmax = pmin_pmax[pmin_pmax == 1]
        pmin = pmin_pmax[pmin_pmax == -1]

        if pmin.index[0] < pmax.index[0]:
            pmin = pmin.drop(pmin.index[0])

        if pmin.index[-1] < pmax.index[-1]:
            pmax = pmax.drop(pmax.index[-1])

        dd = (_np.array(ds[pmin.index]) -
              _np.array(ds[pmax.index])) / _np.array(ds[pmax.index])
        d = {'drawdown': dd}

        if return_dates:
            d['d_start'] = pmax.index
            d['d_end'] = pmin.index

        if return_duration:
            dur = [
                _np.busday_count(p1.date(), p2.date())
                for p1, p2 in zip(pmax.index, pmin.index)
            ]
            d['duration'] = dur

        return _pd.DataFrame({}, index=df.index).join(
            _pd.DataFrame(d, index=pmax.index)).fillna(0)
Esempio n. 15
0
from pandas.core.base import PandasObject

from pandas_ml_common.df.ml import ML
from pandas_ml_common.lazy import LazyInit
from pandas_ml_common.utils import get_pandas_object, Constant, inner_join, has_indexed_columns, np_nans, \
    flatten_multi_column_index, unique_level_columns

_log = logging.getLogger(__name__)
_log.debug(f"numpy version {np.__version__}")
_log.debug(f"pandas version {pd.__version__}")

np.nans = np_nans
setattr(PandasObject, "_", property(lambda self: ML(self)))
setattr(PandasObject, "inner_join", inner_join)
setattr(pd.DataFrame, "to_frame", lambda self: self)
setattr(pd.DataFrame, "flatten_columns", flatten_multi_column_index)
setattr(pd.DataFrame, "unique_level_columns", unique_level_columns)
setattr(pd.DataFrame, "has_indexed_columns", lambda self: has_indexed_columns(self))
# setattr(pd.Series, 'columns', lambda self: [self.name]) # FIXME leads to problems where we do hasattr(?, columns)


class Typing(object):
    PatchedDataFrame = pd.DataFrame
    PatchedSeries = pd.Series
    PatchedPandas = Union[PatchedDataFrame, PatchedSeries]

    DataFrame = pd.DataFrame
    Series = pd.Series
    Pandas = Union[DataFrame, Series]
    PdIndex = pd.Index
    _Selector = Union[str, List['MlGetItem'], Callable[[Any], Union[pd.DataFrame, pd.Series]], Constant]
Esempio n. 16
0
 def plot_features(self, data: Union[FeaturesAndLabels, MlModel]):
     fnl = data.features_and_labels if isinstance(data, MlModel) else data
     (features, _), labels, _, _, _ = self.df._.extract(fnl)
     return plot_features(
         features.join(labels),
         labels.columns[0] if has_indexed_columns(labels) else labels.name)
Esempio n. 17
0
from pandas.core.base import PandasObject

from pandas_ml_common.df.ml import ML
from pandas_ml_common.lazy import LazyInit
from pandas_ml_common.utils import get_pandas_object, Constant, inner_join, has_indexed_columns, nans

_log = logging.getLogger(__name__)
_log.debug(f"numpy version {np.__version__}")
_log.debug(f"pandas version {pd.__version__}")

np.nans = nans
setattr(PandasObject, "_", property(lambda self: ML(self)))
setattr(PandasObject, "inner_join", inner_join)
setattr(pd.DataFrame, "to_frame", lambda self: self)
setattr(pd.DataFrame, "has_indexed_columns",
        lambda self: has_indexed_columns(self))
# setattr(pd.Series, 'columns', lambda self: [self.name]) # FIXME leads to problems where we do hasattr(?, columns)


class Typing(object):
    PatchedDataFrame = pd.DataFrame
    PatchedSeries = pd.Series
    PatchedPandas = Union[PatchedDataFrame, PatchedSeries]

    DataFrame = pd.DataFrame
    Series = pd.Series
    Pandas = Union[DataFrame, Series]
    PdIndex = pd.Index
    _Selector = Union[str, List['MlGetItem'],
                      Callable[[Any], Union[pd.DataFrame,
                                            pd.Series]], Constant]