def ta_macd(df: Typing.PatchedPandas, fast_period=12, slow_period=26, signal_period=9, relative=True) -> _PANDAS: if has_indexed_columns(df): res = _pd.DataFrame({}, index=df.index) for col in df.columns.to_list(): d = ta_macd(df[col], fast_period, slow_period, slow_period, relative) d.columns = _pd.MultiIndex.from_product([[col], d.columns]) res = res.join(d) res.columns = _pd.MultiIndex.from_tuples(res.columns.to_list()) return res else: fast = _ema(df, fast_period) slow = _ema(df, slow_period) macd = (fast / slow - 1) if relative else (fast - slow) signal = _ema(macd, signal_period) hist = macd - signal suffix = f'{fast_period},{slow_period},{signal_period}' for label, frame in { f"macd_{suffix}": macd, f"signal_{suffix}": signal, f"histogram_{suffix}": hist }.items(): frame.name = label macd = macd.to_frame() if isinstance(macd, _pd.Series) else macd return macd.join(signal).join(hist)
def plot_bar(df, fields, figsize=None, ax=None, colors=None, color_map: str = 'afmhot', **kwargs): data = get_pandas_object(df, fields) if has_indexed_columns(data): for col in data.columns: plot_bar(data, col, figsize, ax, colors, color_map, **kwargs) return ax colors = get_pandas_object(df, colors) if ax is None: fig, ax = new_fig_ts_axis(figsize) bars = ax.bar(df.index, height=data.values, label=str(data.name), **kwargs) if colors is not None: color_function = plt.get_cmap(color_map) domain = (colors.values.min(), colors.values.max()) if isinstance( colors, PandasObject) else (colors.min(), colors.max()) r = ReScaler(domain, (0, 1)) for i, c in enumerate(colors): color = color_function(r(c)) # TODO if alpha is provided then color = (*color[:-1], alpha) bars[i].set_color(color) return ax
def ta_one_hot_encode_discrete(po: Union[pd.Series, pd.DataFrame], drop_na=True, nr_of_classes=None, offset=None, expand=False) -> Union[pd.Series, pd.DataFrame]: if has_indexed_columns(po): return pd.DataFrame( [ta_one_hot_encode_discrete(po[col]) for col in po.columns]).T else: if drop_na: po = po.dropna() if offset is None: offset = po.min() values = po.values.astype(int) values = values - offset if nr_of_classes is None: nr_of_classes = values.max() + 1 label_binarizer = LabelBinarizer() label_binarizer.fit(range(int(nr_of_classes))) if expand: columns = expand if isinstance(expand, Iterable) else None return pd.DataFrame(label_binarizer.transform(values), index=po.index, columns=columns) else: return pd.Series(label_binarizer.transform(values).tolist(), index=po.index, name=po.name)
def ta_backtest(signal: Typing.PatchedDataFrame, prices: Typing.PatchedPandas, action: Callable[[pd.Series], Tuple[int, float]], slippage: Callable[[float], float] = lambda _: 0): if has_indexed_columns(signal): assert len(signal.columns) == len( prices.columns), "Signal and Prices need the same shape!" res = pd.DataFrame({}, index=signal.index, columns=pd.MultiIndex.from_product([[], []])) for i in range(len(signal.columns)): df = ta_backtest(signal[signal.columns[i]], prices[prices.columns[i]], action, slippage) top_level_name = ",".join(prices.columns[i]) if isinstance( prices.columns[i], tuple) else prices.columns[i] df.columns = pd.MultiIndex.from_product([[top_level_name], df.columns.to_list()]) res = res.join(df) return res assert isinstance(prices, pd.Series), "prices need to be a series!" trades = StreamingTransactionLog() def trade_log_action(row): direction_amount = action(row) if isinstance(direction_amount, tuple): trades.perform_action(*direction_amount) else: trades.rebalance(float(direction_amount)) signal.to_frame().apply(trade_log_action, axis=1, raw=True) return trades.evaluate(prices.rename("price"), slippage)
def plot_line(df, fields, figsize=None, ax=None, **kwargs): data = get_pandas_object(df, fields) if ax is None: fig, ax = new_fig_ts_axis(figsize) if has_indexed_columns(data): for col in data.columns: plot_line(data, col, figsize, ax, **kwargs) else: ax.plot(df.index, data.values, label=str(data.name), **kwargs) return ax
def ta_wilders(df: _PANDAS, period=12) -> _PANDAS: if has_indexed_columns(df): resdf = _pd.DataFrame({}, index=df.index) for col in df.columns: s = df[col].dropna() res = _np.zeros(s.shape) _ws(s.values, period, res) resdf = resdf.join(_pd.DataFrame({col: res}, index=s.index)) res = resdf else: res = ta_wilders(df.to_frame(), period).iloc[:, 0] return _wcs(f"wilders_{period}", res)
def ta_multi_bbands(s: _pd.Series, period=5, stddevs=[0.5, 1.0, 1.5, 2.0], ddof=1, include_mean=True) -> _PANDAS: assert not has_indexed_columns(s) mean = s.rolling(period).mean().rename("mean") std = s.rolling(period).std(ddof=ddof) df = _pd.DataFrame({}, index=mean.index) for stddev in reversed(stddevs): df[f'lower-{stddev}'] = mean - (std * stddev) if include_mean: df["mean"] = mean for stddev in stddevs: df[f'upper-{stddev}'] = mean + (std * stddev) return df
def ta_multi_ma(df: _t.PatchedDataFrame, average_function='sma', period=12, factors=_np.linspace(1 - 0.2, 1 + 0.2, 5)) -> _t.PatchedDataFrame: ma = {'sma': ta_sma, 'ema': ta_ema, 'wilder': ta_wilders} res = _pd.DataFrame({}, index=df.index) if has_indexed_columns(df): res = None for col in df.columns.to_list(): _df = ta_multi_ma(df[col], average_function, period, factors) _df = add_multi_index(_df, col) res = inner_join(res, _df, force_multi_index=True) return res for x in factors: res[f"{df.name}_{average_function}({period})({x:.3f})"] = ma[average_function](df, period) * x return res
def reshape_when_multi_index_column(values): if has_indexed_columns(self.df) and isinstance( self.df.columns, pd.MultiIndex): index_shape = multi_index_shape(self.df.columns) try: # try to reshape the nested arrays into the shape of the multi index values = values.reshape((values.shape[0], ) + index_shape + values.shape[len(index_shape):]) except ValueError as ve: # but it might well be that the shapes do not match, then just ignore the index shape if not "cannot reshape array" in str(ve): raise ve if squeeze and values.ndim > 2 and values.shape[2] == 1: values = values.reshape(values.shape[:-1]) return values
def with_column_suffix(suffix, po, ref_po=None): if ref_po is None: ref_po = po if has_indexed_columns(po): if isinstance(po.index, pd.MultiIndex): po.columns = pd.MultiIndex.from_tuples([ (suffix, *col) for col in ref_po.columns.to_list() ]) return po else: po.columns = ref_po.columns return po.add_suffix(f'_{suffix}') else: if isinstance(po.name, tuple): return po.rename((suffix, *ref_po.name)) else: return po.rename(f'{ref_po.name}_{suffix}')
def ta_one_hot_encode_discrete(po: Union[pd.Series, pd.DataFrame], drop_na=True) -> Union[pd.Series, pd.DataFrame]: if has_indexed_columns(po): return pd.DataFrame( [ta_one_hot_encode_discrete(po[col]) for col in po.columns]).T else: if drop_na: po = po.dropna() values = po.values.astype(int) offset = po.min() values = values - offset nr_of_classes = values.max() + 1 label_binarizer = LabelBinarizer() label_binarizer.fit(range(int(nr_of_classes))) return pd.Series(label_binarizer.transform(values).tolist(), index=po.index, name=po.name)
def values(self) -> np.ndarray: """ In contrast to pandas.values the ml.values returns a n-dimensional array with respect to MultiIndex and/or nested numpy arrays inside of cells :return: numpy array with shape of MultiIndex and/or nested arrays from cells """ # get raw values values = unpack_nested_arrays(self.df) # return in multi level shape if multi index is used if has_indexed_columns(self.df) and isinstance(self.df.columns, pd.MultiIndex): index_shape = multi_index_shape(self.df.columns) values = values.reshape((values.shape[0], ) + index_shape + values.shape[len(index_shape):]) return values
def plot_stacked_bar(df, columns, figsize=None, ax=None, padding=0.02, colors=None, color_map: str = 'afmhot', **kwargs): data = get_pandas_object(df, columns) if not has_indexed_columns(data): return plot_bar(df, columns, figsize, ax, colors, color_map, **kwargs) # TODO add colors ... if ax is None: fig, ax = new_fig_ts_axis(figsize) if padding is not None: b, t = ax.get_ylim() if b == 0 and t == 1: b = np.inf t = -np.inf ax.set_ylim( min(data.values.min(), b) * (1 - padding), max(data.values.max(), t) * (1 + padding)) bottom = None for column in columns: data = get_pandas_object(df, column) if bottom is not None: kwargs["bottom"] = bottom height = data - bottom else: height = data bottom = height if bottom is None else bottom + height ax.bar(mdates.date2num(df.index), height, **kwargs) return ax
def ta_draw_down(df: _PANDAS, return_dates=False, return_duration=False): if has_indexed_columns(df): res = _pd.DataFrame({}, index=df.index) for col in df.columns.to_list(): d = ta_draw_down(df[col], return_dates, return_duration) d.columns = _pd.MultiIndex.from_product([[col], d.columns]) res = res.join(d) res.columns = _pd.MultiIndex.from_tuples(res.columns.to_list()) return res else: ds = df pmin_pmax = ( ds.diff(-1) > 0).astype(int).diff() # <- -1 indicates pmin, +1 indicates pmax pmax = pmin_pmax[pmin_pmax == 1] pmin = pmin_pmax[pmin_pmax == -1] if pmin.index[0] < pmax.index[0]: pmin = pmin.drop(pmin.index[0]) if pmin.index[-1] < pmax.index[-1]: pmax = pmax.drop(pmax.index[-1]) dd = (_np.array(ds[pmin.index]) - _np.array(ds[pmax.index])) / _np.array(ds[pmax.index]) d = {'drawdown': dd} if return_dates: d['d_start'] = pmax.index d['d_end'] = pmin.index if return_duration: dur = [ _np.busday_count(p1.date(), p2.date()) for p1, p2 in zip(pmax.index, pmin.index) ] d['duration'] = dur return _pd.DataFrame({}, index=df.index).join( _pd.DataFrame(d, index=pmax.index)).fillna(0)
from pandas.core.base import PandasObject from pandas_ml_common.df.ml import ML from pandas_ml_common.lazy import LazyInit from pandas_ml_common.utils import get_pandas_object, Constant, inner_join, has_indexed_columns, np_nans, \ flatten_multi_column_index, unique_level_columns _log = logging.getLogger(__name__) _log.debug(f"numpy version {np.__version__}") _log.debug(f"pandas version {pd.__version__}") np.nans = np_nans setattr(PandasObject, "_", property(lambda self: ML(self))) setattr(PandasObject, "inner_join", inner_join) setattr(pd.DataFrame, "to_frame", lambda self: self) setattr(pd.DataFrame, "flatten_columns", flatten_multi_column_index) setattr(pd.DataFrame, "unique_level_columns", unique_level_columns) setattr(pd.DataFrame, "has_indexed_columns", lambda self: has_indexed_columns(self)) # setattr(pd.Series, 'columns', lambda self: [self.name]) # FIXME leads to problems where we do hasattr(?, columns) class Typing(object): PatchedDataFrame = pd.DataFrame PatchedSeries = pd.Series PatchedPandas = Union[PatchedDataFrame, PatchedSeries] DataFrame = pd.DataFrame Series = pd.Series Pandas = Union[DataFrame, Series] PdIndex = pd.Index _Selector = Union[str, List['MlGetItem'], Callable[[Any], Union[pd.DataFrame, pd.Series]], Constant]
def plot_features(self, data: Union[FeaturesAndLabels, MlModel]): fnl = data.features_and_labels if isinstance(data, MlModel) else data (features, _), labels, _, _, _ = self.df._.extract(fnl) return plot_features( features.join(labels), labels.columns[0] if has_indexed_columns(labels) else labels.name)
from pandas.core.base import PandasObject from pandas_ml_common.df.ml import ML from pandas_ml_common.lazy import LazyInit from pandas_ml_common.utils import get_pandas_object, Constant, inner_join, has_indexed_columns, nans _log = logging.getLogger(__name__) _log.debug(f"numpy version {np.__version__}") _log.debug(f"pandas version {pd.__version__}") np.nans = nans setattr(PandasObject, "_", property(lambda self: ML(self))) setattr(PandasObject, "inner_join", inner_join) setattr(pd.DataFrame, "to_frame", lambda self: self) setattr(pd.DataFrame, "has_indexed_columns", lambda self: has_indexed_columns(self)) # setattr(pd.Series, 'columns', lambda self: [self.name]) # FIXME leads to problems where we do hasattr(?, columns) class Typing(object): PatchedDataFrame = pd.DataFrame PatchedSeries = pd.Series PatchedPandas = Union[PatchedDataFrame, PatchedSeries] DataFrame = pd.DataFrame Series = pd.Series Pandas = Union[DataFrame, Series] PdIndex = pd.Index _Selector = Union[str, List['MlGetItem'], Callable[[Any], Union[pd.DataFrame, pd.Series]], Constant]