def plot(df: pd.DataFrame): s_close = df[DF_CLOSE] s_open = df[DF_OPEN] s_high = df[DF_HIGH] s_low = df[DF_LOW] s_time = df.index ax = plt.gca() plt.xticks(rotation=60, ha='right') plt.grid(which='major', color='k', lw=0.25, alpha=0.5) plt.grid(which='minor', color='k', lw=0.25, alpha=0.2) years = matplotlib.dates.YearLocator() months = matplotlib.dates.MonthLocator() days = matplotlib.dates.DayLocator() dnum = len(df.index) if dnum / 12 > 20: ax.xaxis.set_major_locator(years) ax.xaxis.set_minor_locator(months) year_month = matplotlib.dates.DateFormatter('%Y') ax.xaxis.set_major_formatter(year_month) else: ax.xaxis.set_major_locator(months) ax.xaxis.set_minor_locator(days) year_month = matplotlib.dates.DateFormatter('%Y-%m') ax.xaxis.set_major_formatter(year_month) ts = TrailingStats(s_close, window_size=365*2) ts2 = TrailingStats(s_close, window_size=100) s_fit = ts.lin_reg_value s_fit2 = ts2.lin_reg_value s3 = savgol_filter(s_close, window_length=31, polyorder=3) s4 = savgol_filter(s_close, window_length=1001, polyorder=3 ) s4 = savgol_filter(s4, window_length=301, polyorder=6 ) plt.subplot(2,1,1) plt.semilogy(s_time, s_close, '.-', ms=2, alpha=.2) plt.semilogy(s_time, s_fit, '-', alpha=.6, label='exp fit-400') plt.semilogy(s_time, s_fit2, '-', alpha=.6, label='exp fit-40') plt.semilogy(s_time, s3, '--', alpha=.6, label='sav-gol') plt.semilogy(s_time, s4, '-', alpha=.6, label='sav-gol-2') plt.legend() plt.grid() plt.subplot(2,1,2) plt.plot(s_time, ts2.exp_accel) plt.grid() # plt.semilogy(s_time, ts.rolling_avg, alpha=.6, label='rolling avg') # pdb.set_trace() return
def func1(window_size, df: pd.DataFrame): v = df[DF_ADJ_CLOSE] t = TrailingStats(v, window_size) volatility = t.exp_std_dev rate = t.exp_growth pdb.set_trace() return rate, volatility
def test_trailing_avg(): symbol = 'VOO' y = YahooData([symbol]) df = y.dataframes[symbol] series = df[DF_ADJ_CLOSE].iloc[-100:] window = 21 ts = TrailingStats(series, window) avgs = [] for interval in ts._adj_close_intervals: out = np.mean(interval) avgs.append(out) avgs = np.array(avgs) assert np.all(np.isclose(avgs, ts.rolling_avg)) times = series.index[window:] plt.figure() plt.plot(ts.series, label='actual') plt.plot(ts.times, avgs, label='TEST AVG') plt.plot(ts.times, ts.rolling_avg, '--', label='IMPLEMENTED AVG') plt.legend()
def test_std(): y = YahooData() df = y.dataframes['VOO'] date1 = np.datetime64('2020-01-01') date2 = np.datetime64('2020-11-01') ii = (df.index > date1) & (df.index < date2) df = df.loc[ii] series = df[DF_ADJ_CLOSE] ts = TrailingStats(series, window_size=10) c = ts.exp_std_dev series2 = pd.Series(data=c, index=ts.times) # p = trailing_percentiles(c, window=300) # c = ts.exp_accel x = ts.times y = ts.values ax = plt.subplot(2, 1, 1) ax.set_yscale('log') plt.scatter(x, y, c=c, s=4) plt.grid() plt.subplot(2, 1, 2) plt.plot(x, c) # plt.plot(x, p/100) plt.grid() return
def _rolling_stats(self, s: pd.Series): spy = self.dict['SPY'] d = {} d['SPY'] = spy d['AAA'] = s df = pd.concat(d, join='inner', axis=1) spy = df['SPY'] s = df['AAA'] ts = TrailingStats(s, window_size=self.intervals) ts_spy = self._roi_spy_trailing_stats value_gen = zip(ts_spy._values_intervals, ts._values_intervals, ts_spy.mean) outputs = [] for value_spy, value, target in value_gen: out = self._stats_function( s = pd.Series(value), roi_spy = value_spy, target = target ) outputs.append(out) return pd.DataFrame(outputs, index=ts.times)
def test_skip1(): y = YahooData() df = y.dataframes['SPY'] series = df[DF_ADJ_CLOSE] skip = 10 window_size = 5 def compare_attr(stats1: TrailingStats, stats2: TrailingStats, name: str): print(f'comparing {name}') a1 = getattr(stats1, name)[::skip] a2 = getattr(stats2, name) a1[np.isnan(a1)] = 0 a2[np.isnan(a2)] = 0 l1 = len(a1) l2 = len(a2) print(f'len1 = {l1}') print(f'len2 = {l2}') assert np.all(a1 == a2) ts1 = TrailingStats(series, window_size=window_size) for skip in range(2, 10): print(f'testing skip {skip}') ts2 = TrailingStats(series, window_size=window_size, skip=skip) # # Check rolling avg. # rolling1 = ts1._adj_close_intervals.mean(axis=1) # rolling1 = ts1._append_nan(rolling1)[:: skip] # rolling1[np.isnan(rolling1)] = 0 # rolling11 = ts1.rolling_avg[:: skip] # rolling11[np.isnan(rolling11)] = 0 # assert np.all(rolling1 == rolling11) rolling2 = ts2._adj_close_intervals.mean(axis=1) rolling2[np.isnan(rolling2)] = 0 compare_attr(ts1, ts2, 'times') compare_attr(ts1, ts2, 'time_days_int') compare_attr(ts1, ts2, 'rolling_avg') compare_attr(ts1, ts2, 'return_ratio') compare_attr(ts1, ts2, 'slope_normalized')
def post2(df: pd.DataFrame): series = df[DF_ADJ_CLOSE] ts = TrailingStats(series, 10) stat1 = ts.exp_growth mean = np.nanmean(stat1) std = np.nanstd(stat1) return (stat1 - mean) / std
def post1(df: pd.DataFrame): series = df[DF_ADJ_CLOSE] ts = TrailingStats(series, 15) stat1 = ts.return_ratio mean = np.nanmean(stat1) std = np.nanstd(stat1) return (stat1 - mean) / std
def stop_loss(df: pd.DataFrame): series = df[DF_ADJ_CLOSE] ts = TrailingStats(series, window_size=200) loss = ts.max_loss gain = ts.max_gain delta = gain - loss delta = append_nan(delta, ts.window_size) return delta
def post2(df:pd.DataFrame): series = df[DF_ADJ_CLOSE] ts = TrailingStats(series, 8) slope, interc, rvalue = ts.linear_regression r2 = rvalue**2 metric = slope * r2 std = cumulative_std(metric) return metric / std
def my_indicators(df, window: int): ts = TrailingStats(df[DF_ADJ_CLOSE], window) i1 = ts.rolling_avg i2 = ts.exp_growth i1[np.isnan(i1)] = 0 i2[np.isnan(i2)] = 0 return i1, i2
def test_skip0(): y = YahooData() df = y.dataframes['SPY'] series = df[DF_ADJ_CLOSE] skip = 10 windows_size = 50 ts = TrailingStats(series, window_size=windows_size) times1 = ts.times[::skip] slopes1 = ts.slope_normalized[::skip] slopes1[np.isnan(slopes1)] = 0 ts2 = TrailingStats(series, window_size=windows_size, skip=skip) times2 = ts2.times slopes2 = ts2.slope_normalized slopes2[np.isnan(slopes2)] = 0 assert np.all(times1 == times2) assert np.all(slopes1 == slopes2)
def create_data(symbols: list[str]): # np.random.seed(0) # symbols = np.array(ALL) # np.random.shuffle(symbols) # symbols = symbols[0:1] y = YahooData(symbols) # window_sizes = [5, 10, 15, 20, 30, 40, 50, 60, 80, 100, 150, 200, 400] window_sizes = [5, 10, 20, 50, 100, 400] window_sizes = np.array(window_sizes) max_window = np.max(window_sizes) future_growth_window = 20 attrs = [ 'exp_growth', 'rolling_avg', ] df_dict = {} for ii, symbol in enumerate(symbols): print(f'Calculating symbol {symbol}') df = y.get_symbol_all(symbol) if len(df) == 0: print(f'Symbol {symbol} data not available') else: series = df[DF_ADJ_CLOSE] new = {} for window in window_sizes: # Get indicators ts = TrailingStats(series, window) for attr in attrs: feature = getattr(ts, attr) name = attr + f'({window})' new[name] = feature[0:-future_growth_window] # Get future growths times = utils.dates2days(series.index.values) dates = series.index[0:-future_growth_window] _, growths = avg_future_growth(times, series.values, window=future_growth_window) new['avg_future_growth'] = growths new['date'] = dates df_new = pd.DataFrame(new) # Chop of NAN due to the largest window df_new = df_new.iloc[max_window:] df_dict[symbol] = df_new return df_dict
def indicator1(df): close = df['Close'] windows = [5, 10, 20, 50] outs = {} for window in windows: ts = TrailingStats(close, window) out = ts.exp_growth # Replace NAN with 0 out[np.isnan(out)] = 0 outs[f'{window}'] = out return outs
def test_skip(): y = YahooData() df = y.dataframes['SPY'] series = df[DF_ADJ_CLOSE] for skip in range(2, 43, 3): ts = TrailingStats(series, window_size=20, skip=skip) slope = ts.slope_normalized # print(len(slope)) # print(len(ts.times)) assert len(slope) == len(ts.times)
def test_max_loss(): y = YahooData() df = y.dataframes['VOO'] date1 = np.datetime64('2020-01-01') date2 = np.datetime64('2020-11-01') ii = (df.index > date1) & (df.index < date2) df = df.loc[ii] series = df[DF_ADJ_CLOSE] ts = TrailingStats(series, window_size=10) max_loss = ts.max_loss return
def indicator1(df): close = df[DF_ADJ_CLOSE] windows = [100, 400] outs = {} for window in windows: ts = TrailingStats(close, window) out = ts.exp_growth out[np.isnan(out)] = 0 outs[f'growth({window})'] = out out = ts.exp_reg_diff out[np.isnan(out)] = 0 outs[f'diff({window})'] = out return outs
def features(self, window: int): attrs = ['exp_growth', 'exp_accel'] # attrs = ['exp_accel'] series1 = self.series ts1 = TrailingStats(series1, window) new = {} for attr in attrs: feature = getattr(ts1, attr) name = f'Close({attr}, {window})' new[name] = feature df_new = pd.DataFrame(new, index=series.index) df_new = df_new.iloc[window :] return df_new
def test_close_intervals(): symbol = 'VOO' y = YahooData([symbol]) df = y.dataframes[symbol].iloc[-400:] series = df[DF_ADJ_CLOSE] window_size = 11 ssc = TrailingStats(series, window_size) interval = ssc._adj_close_intervals[0] correct = series.iloc[0:window_size] assert np.all(np.isclose(interval, correct)) interval = ssc._adj_close_intervals[-1] correct = series.iloc[-window_size:] assert np.all(np.isclose(interval, correct))
def post1(df:pd.DataFrame): series = df[DF_ADJ_CLOSE] pratio = peak_ratio(series.values) pseries = pd.Series(data=pratio, index=series.index) ts = TrailingStats(pseries, 15) slope, interc, rvalue = ts.linear_regression r2 = rvalue**2 slope_std = cumulative_std(slope) r_std = cumulative_std(rvalue) d = {} d['metric'] = -slope * r2 / slope_std d['slope'] = slope d['r2'] = r2 d['peak_ratio'] = pratio return d
def my_indicator(df): series = df[DF_ADJ_CLOSE] # windows = [20, 50, 70, 90, 600] windows = [400, 600, 1200] windows = np.array(windows) weights = np.sqrt(windows) / windows[-1] weights = weights[:, None] # pdb.set_trace() growths = [] for window in windows: ts = TrailingStats(series, window) g1 = ts.exp_growth g1[np.isnan(g1)] = -1 growths.append(ts.exp_growth) growths = np.array(growths) #* weights out = growths.max(axis=0) # out[max_loss > .15] = 0 return out
def post1(df: pd.DataFrame): series = df[DF_ADJ_CLOSE] series_log = np.log(series) windows = [1, 2, 4] slope_arr = [] rvalue_arr = [] for window in windows: ts = TrailingStats(series_log, 252 * window) slopes, b, r = ts.linear_regression slope_arr.append(slopes) rvalue_arr.append(r**2) slope_arr = np.array(slope_arr) rvalue_arr = np.array(rvalue_arr) ii = np.argmax(rvalue_arr, axis=0) jj = np.arange(len(ii)) slope_arr = slope_arr[ii, jj] return slope_arr
def _features(self, df, window: int): attrs = ['exp_growth', 'exp_std_dev'] series1 = df[DF_ADJ_CLOSE] ts1 = TrailingStats(series1, window) new = {} for attr in attrs: feature = getattr(ts1, attr) # Standardize exp_std_dev if 'exp_std_dev' in attr: mean = np.nanmean(feature) std = np.nanstd(feature) feature = (feature - mean) / std name = f'Close({attr}, {window})' new[name] = feature df_new = pd.DataFrame(new, index=df.index) df_new = df_new.iloc[window:] return df_new
import matplotlib.pyplot as plt from scipy.signal import cwt from backtester.stockdata import YahooData from backtester.indicators import TrailingStats from backtester.definitions import DF_ADJ_CLOSE yahoo = YahooData() df = yahoo['GOOG'] series = df[DF_ADJ_CLOSE] y = series.values logy = np.log(y) windows = [20, 50, 70, 90, 600] growths = [] for window in windows: ts = TrailingStats(series, window) growths.append(ts.exp_growth) date1 = np.datetime64('2008-01-01') date2 = np.datetime64('2021-01-01') # f = np.abs(np.fft.fft(logy)) ax = plt.subplot(2, 1, 1) plt.plot(series) ax.set_yscale('log') plt.grid() plt.xlim(date1, date2) plt.subplot(2, 1, 2) for ii, window in enumerate(windows): plt.plot(series.index, growths[ii], label=window)
def fn_std_change(df: pd.DataFrame, window_size): v = df[DF_ADJ_CLOSE] t = TrailingStats(v, window_size) return t.exp_std_dev
def fn_growth(df: pd.DataFrame, window_size): v = df[DF_ADJ_CLOSE] t = TrailingStats(v, window_size) return t.exp_growth
from backtester.definitions import DF_ADJ_CLOSE from backtester.indicators import TrailingStats y = YahooData() start = np.datetime64('2003-01-01') end = np.datetime64('2005-01-01') y = y.filter_dates(start=start, end=end) s = y.dataframes['SPY'][DF_ADJ_CLOSE] window = 100 windows = [ 100, ] for window in windows: ts = TrailingStats(s, window) loss = ts.max_loss # loss99 = np.percentile(loss, 99) plt.subplot(2, 1, 1) plt.plot(ts.times, ts.max_loss, label='max loss') plt.plot(ts.times, ts.max_gain, label='max gain') delta = ts.max_gain - ts.max_loss plt.plot(ts.times, delta, label='delta') max_loss_mean = np.mean(ts.max_loss) max_loss_p = np.percentile(ts.max_loss, 95) plt.axhline(0.0) plt.legend() plt.subplot(2, 1, 2)
def post1(df: pd.DataFrame): series = df[DF_ADJ_CLOSE] ts = TrailingStats(series, 100) metric = ts.max_gain - ts.max_loss return metric
def func2(window_size, df: pd.DataFrame): v = df[DF_ADJ_CLOSE] t = TrailingStats(v, window_size) return t.exp_std_dev
def post1(df: pd.DataFrame): series = df[DF_ADJ_CLOSE] ts = TrailingStats(series, 252 * 5) stat1 = ts.exp_growth return stat1