def extract_feature_labels_weights( df: pd.DataFrame, features_and_labels, **kwargs ) -> Tuple[Tuple[pd.DataFrame, int], pd.DataFrame, pd.DataFrame, pd.Series, pd.Series]: features = get_pandas_object(df, features_and_labels.features, **kwargs).dropna() labels = get_pandas_object(df, features_and_labels.labels, **kwargs).dropna() targets = call_if_not_none( get_pandas_object(df, features_and_labels.targets, **kwargs), 'dropna') sample_weights = call_if_not_none( get_pandas_object(df, features_and_labels.sample_weights, **kwargs), 'dropna') gross_loss = call_if_not_none( get_pandas_object(df, features_and_labels.gross_loss, **kwargs), 'dropna') common_index = intersection_of_index(features, labels, targets, sample_weights, gross_loss) if features_and_labels.label_type is not None: labels = labels.astype(features_and_labels.label_type) return ((features.loc[common_index], len(df) - len(features) + 1), labels.loc[common_index], loc_if_not_none(targets, common_index), loc_if_not_none(sample_weights, common_index), loc_if_not_none(gross_loss, common_index))
def map_prediction_to_target(df, prediction, targets): def _round(val, d): return round(val, d) if isinstance(val, float) else val dfp = get_pandas_object(df, prediction) p = dfp._.values.reshape((len(df), -1)) dft = get_pandas_object(df, targets) t = dft._.values.reshape((len(df), -1)) if p.shape[1] == t.shape[1]: # 1:1 mapping index = [(date, _round(target, 2)) for date in df.index for target in dft.loc[date].values] elif p.shape[1] == t.shape[1] - 1: # we need to build ranges def build_tuples(l): return [(_round(l[i - 1], 2), _round(l[i], 2)) for i in range(1, len(l))] index = [(date, f"{target}") for date in df.index for target in build_tuples(dft.loc[date].tolist())] elif p.shape[1] == t.shape[1] + 1: # mapping of the left and right extremes using +/- inf def build_tuples(l): l = [-np.inf, *l, np.inf] return [(_round(l[i - 1], 2), _round(l[i], 2)) for i in range(1, len(l))] index = [(date, target) for date in df.index for target in build_tuples(dft.loc[date]._.values.tolist())] else: raise ValueError(f"unable to match {p.shape[1]} predictions to {t.shape[1]} +/-1 targets") return pd.DataFrame({"prediction": p.reshape((-1,))}, index=pd.MultiIndex.from_tuples(index))
def extract_features(df: pd.DataFrame, features_and_labels, **kwargs) -> Tuple[List, pd.DataFrame, pd.DataFrame]: features = get_pandas_object(df, features_and_labels.features, **kwargs).dropna() targets = call_if_not_none( get_pandas_object(df, features_and_labels.targets, **kwargs), 'dropna') common_index = intersection_of_index(features, targets) if len(features) <= 0: raise ValueError("not enough data!") return (features_and_labels.label_columns, features.loc[common_index], loc_if_not_none(targets, common_index))
def ta_stacked_bar(df, columns, figsize=None, ax=None, padding=0.02, **kwargs): # TODO add colors ... if ax is None: fig, ax = new_fig_ts_axis(figsize) if padding is not None: b, t = ax.get_ylim() if b == 0 and t == 1: b = np.inf t = -np.inf ax.set_ylim(min(df[columns].values.min(), b) * (1 - padding), max(df[columns].values.max(), t) * (1 + padding)) bottom = None for column in columns: data = get_pandas_object(df, column) if bottom is not None: kwargs["bottom"] = bottom height = data - bottom else: height = data bottom = height if bottom is None else bottom + height ax.bar(mdates.date2num(df.index), height, **kwargs) return ax
def feature_selection(self, features_and_labels: FeaturesAndLabels, top_features: int = 5, correlation_threshold: float = 0.5, minimum_features: int = 1, lags: Iterable[int] = range(100), show_plots: bool = True, figsize: Tuple[int, int] = (12, 10)): # extract pandas objects features = get_pandas_object(self.df, features_and_labels.features) label = get_pandas_object(self.df, features_and_labels.labels) # try to estimate good features return feature_selection(features, label, top_features, correlation_threshold, minimum_features, lags, show_plots, figsize)
def ta_line(df, fields, figsize=None, ax=None, **kwargs): data = get_pandas_object(df, fields).values if ax is None: fig, ax = new_fig_ts_axis(figsize) ax.plot(df.index, data, **kwargs) return ax
def ta_candlestick(self, open="Open", high="High", low="Low", close="Close", ax=None, figsize=None, **kwargs): df = self if isinstance(self, pd.DataFrame) else self._parent if ax is None: fig, ax = new_fig_ts_axis(figsize) # Plot candlestick chart data = pd.DataFrame({ "Date": mdates.date2num(df.index), "open": get_pandas_object(df, open), "high": get_pandas_object(df, high), "low": get_pandas_object(df, low), "close": get_pandas_object(df, close), }) candlestick_ohlc(ax, data.values, width=0.6, colorup='g', colordown='r') return ax
def extract_labels(df: pd.DataFrame, features_and_labels, **kwargs) -> pd.DataFrame: labels = get_pandas_object(df, features_and_labels.labels, **kwargs).dropna() if features_and_labels.label_type is not None: labels = labels.astype(features_and_labels.label_type) return labels
def ta_bar(df, fields, figsize=None, ax=None, colors=None, color_map: str = 'afmhot', **kwargs): data = get_pandas_object(df, fields).values colors = get_pandas_object(df, colors) if ax is None: fig, ax = new_fig_ts_axis(figsize) bars = ax.bar(df.index, height=data, **kwargs) if colors is not None: color_function = plt.get_cmap(color_map) domain = (colors.values.min(), colors.values.max()) if isinstance(colors, PandasObject) else (colors.min(), colors.max()) r = ReScaler(domain, (0, 1)) for i, c in enumerate(colors): color = color_function(r(c)) # TODO if alpha is provided then color = (*color[:-1], alpha) bars[i].set_color(color) return ax
def with_trend_lines(self, field="Close", panel=0, edge_periods=3, rescale_digits=4, degrees=(-90, 90), angles=30, rho_digits=2): plt.close(self.fig) accumulation, lookup =\ ta_trend_lines(get_pandas_object(self.df, field), edge_periods, rescale_digits, degrees, angles, rho_digits) def plot_trend_line(time, touches): ax = self.axis[panel] td = timedelta(days=time[0]), timedelta(days=time[1]) # first remove all previous trend lines ax.lines = [ l for l in ax.lines if not l.get_label().startswith(".Trend") ] # then select the lines from the lookup table filtered = lookup[(lookup["touch"] >= touches[0]) & (lookup["touch"] <= touches[1])] filtered = filtered[(filtered["distance"] >= td[0]) & (filtered["distance"] <= td[1])] for i, tl in filtered.iterrows(): points = tl["points"][0], tl["points"][-1] ax.plot([p[0] for p in points], [p[1] for p in points], label=".Trend") return self.fig # TODO later add a wg.IntSlider to extend the trend lines from ots last point min_ts, max_ts = 2, len(self.df) time_silder = wg.IntRangeSlider(value=[max_ts, max_ts], min=min_ts, max=max_ts, step=1, continuous_update=False, description='Period:') min_to, max_to = 2, lookup["touch"].max() touch_silder = wg.IntRangeSlider(value=[min_to, max_to], min=min_to, max=max_to, step=1, continuous_update=False, description='Touches:') wg.interact(plot_trend_line, time=time_silder, touches=touch_silder) self.fig.show() return self
def extract_feature_labels_weights(df: Typing.PatchedDataFrame, features_and_labels, **kwargs) -> FeaturesWithLabels: features, targets, latent = extract_features(df, features_and_labels, **kwargs) labels = extract_labels(df, features_and_labels, **kwargs) sample_weights = call_if_not_none( get_pandas_object(df, features_and_labels.sample_weights, **kwargs), 'dropna') gross_loss = call_if_not_none( get_pandas_object(df, features_and_labels.gross_loss, **kwargs), 'dropna') # do some sanity check for any non numeric values in any of the data frames for frame in [features, labels, targets, sample_weights, gross_loss]: if frame is not None: # we could have nested arrays so we need to use the un-nested values values = flatten_nested_list(frame._.values, np.max) max_value = max([v.max() for v in values]) if np.isscalar(max_value) and np.isinf(max_value): _log.warning( f"features containing infinit number\n" f"{frame[frame.apply(lambda r: np.isinf(r.values).any(), axis=1)]}" ) frame.replace([np.inf, -np.inf], np.nan, inplace=True) frame.dropna(inplace=True) # now get the common index and return the filtered data frames common_index = intersection_of_index(features, labels, targets, sample_weights, gross_loss) return FeaturesWithLabels( FeaturesWithRequiredSamples( tuple([f.loc[common_index] for f in features]) if isinstance( features, tuple) else features.loc[common_index], len(df) - len(features) + 1, len(features.columns)), labels.loc[common_index], loc_if_not_none(latent, common_index), loc_if_not_none(targets, common_index), loc_if_not_none(sample_weights, common_index), loc_if_not_none(gross_loss, common_index))
def ta_matrix(df, fields, figsize=None, ax=None, **kwargs): data = fields if isinstance(fields, np.ndarray) else (get_pandas_object( df, fields).ml.values.squeeze()) if ax is None: fig, ax = new_fig_ts_axis(figsize) ax.matshow(data) ax.set_yticklabels([]) ax.set_xticklabels([]) return ax
def extract_features(df: pd.DataFrame, features_and_labels, **kwargs) -> Tuple[List, pd.DataFrame, pd.DataFrame]: if isinstance(features_and_labels.features, tuple): # allow multiple feature sets i.e. for multi input layered networks features = MultiFrameDecorator([ get_pandas_object(df, f, **kwargs).dropna() for f in features_and_labels.features ], True) else: features = get_pandas_object(df, features_and_labels.features, **kwargs).dropna() targets = call_if_not_none( get_pandas_object(df, features_and_labels.targets, **kwargs), 'dropna') common_index = intersection_of_index(features, targets) if len(features) <= 0: raise ValueError("not enough data!") return (features_and_labels.label_columns, features.loc[common_index], loc_if_not_none(targets, common_index))
def extract_feature_labels_weights( df: Typing.PatchedDataFrame, features_and_labels, **kwargs ) -> Tuple[Tuple[pd.DataFrame, int], pd.DataFrame, pd.DataFrame, pd.Series, pd.Series]: _, features, targets = extract_features(df, features_and_labels, **kwargs) labels = get_pandas_object(df, features_and_labels.labels, **kwargs).dropna() sample_weights = call_if_not_none( get_pandas_object(df, features_and_labels.sample_weights, **kwargs), 'dropna') gross_loss = call_if_not_none( get_pandas_object(df, features_and_labels.gross_loss, **kwargs), 'dropna') if features_and_labels.label_type is not None: labels = labels.astype(features_and_labels.label_type) # do some sanity check for any non numeric values in any of the data frames for frame in [features, labels, targets, sample_weights, gross_loss]: if frame is not None: max = frame._.max() if np.isscalar(max) and np.isinf(max): _log.warning( "features containing infinit number\n", frame[frame.apply(lambda r: np.isinf(r.values).any(), axis=1)]) frame.replace([np.inf, -np.inf], np.nan, inplace=True) frame.dropna(inplace=True) # now get the common index and return the filtered data frames common_index = intersection_of_index(features, labels, targets, sample_weights, gross_loss) return ((tuple([f.loc[common_index] for f in features]) if isinstance(features, tuple) else features.loc[common_index], len(df) - len(features) + 1), labels.loc[common_index], loc_if_not_none(targets, common_index), loc_if_not_none(sample_weights, common_index), loc_if_not_none(gross_loss, common_index))
def extract_feature_labels_weights( df: Typing.PatchedDataFrame, features_and_labels, **kwargs ) -> Tuple[Tuple[pd.DataFrame, int], pd.DataFrame, pd.DataFrame, pd.Series, pd.Series]: features = get_pandas_object(df, features_and_labels.features, **kwargs).dropna() labels = get_pandas_object(df, features_and_labels.labels, **kwargs).dropna() targets = call_if_not_none( get_pandas_object(df, features_and_labels.targets, **kwargs), 'dropna') sample_weights = call_if_not_none( get_pandas_object(df, features_and_labels.sample_weights, **kwargs), 'dropna') gross_loss = call_if_not_none( get_pandas_object(df, features_and_labels.gross_loss, **kwargs), 'dropna') if features_and_labels.label_type is not None: labels = labels.astype(features_and_labels.label_type) for frame in [features, labels, targets, sample_weights, gross_loss]: if frame is not None: max = frame._.values.max() if np.isscalar(max) and np.isinf(max): _log.warning( "features containing infinit number\n", frame[frame.apply(lambda r: np.isinf(r.values).any(), axis=1)]) frame.replace([np.inf, -np.inf], np.nan, inplace=True) frame.dropna(inplace=True) common_index = intersection_of_index(features, labels, targets, sample_weights, gross_loss) return ((features.loc[common_index], len(df) - len(features) + 1), labels.loc[common_index], loc_if_not_none(targets, common_index), loc_if_not_none(sample_weights, common_index), loc_if_not_none(gross_loss, common_index))
def extractor(df, **kwargs): return get_pandas_object( get_pandas_object(df, selectors, **kwargs), postprocessor, **kwargs)
def extractor(df, **kwargs): extraction = get_pandas_object(df, list, **kwargs) return get_pandas_object(extraction, postprocessor, **kwargs)