def create_data(symbols: list[str]): # np.random.seed(0) # symbols = np.array(ALL) # np.random.shuffle(symbols) # symbols = symbols[0:1] y = YahooData(symbols) # window_sizes = [5, 10, 15, 20, 30, 40, 50, 60, 80, 100, 150, 200, 400] window_sizes = [5, 10, 20, 50, 100, 400] window_sizes = np.array(window_sizes) max_window = np.max(window_sizes) future_growth_window = 20 attrs = [ 'exp_growth', 'rolling_avg', ] df_dict = {} for ii, symbol in enumerate(symbols): print(f'Calculating symbol {symbol}') df = y.get_symbol_all(symbol) if len(df) == 0: print(f'Symbol {symbol} data not available') else: series = df[DF_ADJ_CLOSE] new = {} for window in window_sizes: # Get indicators ts = TrailingStats(series, window) for attr in attrs: feature = getattr(ts, attr) name = attr + f'({window})' new[name] = feature[0:-future_growth_window] # Get future growths times = utils.dates2days(series.index.values) dates = series.index[0:-future_growth_window] _, growths = avg_future_growth(times, series.values, window=future_growth_window) new['avg_future_growth'] = growths new['date'] = dates df_new = pd.DataFrame(new) # Chop of NAN due to the largest window df_new = df_new.iloc[max_window:] df_dict[symbol] = df_new return df_dict
def test_get_trade_dates(): y = YahooData(['DIS', 'GOOG']) dates = y.get_trade_dates() delta = dates[1:] - dates[0:-1] delta = delta.astype('timedelta64[D]') assert np.max(delta) < 10 plt.plot(dates, dates, '.') return
def test_yahoo(): y = YahooData(['DIS', 'GOOG']) date = datetime(2016, 5, 23) date = np.datetime64(date) out = y.filter_dates(end=date) # out = y.filter_before(date) # y.get_symbols_before(date) df3 = out.dataframes['GOOG'] assert df3.index[-1] <= date return
def test_std(): y = YahooData() df = y.dataframes['VOO'] date1 = np.datetime64('2020-01-01') date2 = np.datetime64('2020-11-01') ii = (df.index > date1) & (df.index < date2) df = df.loc[ii] series = df[DF_ADJ_CLOSE] ts = TrailingStats(series, window_size=10) c = ts.exp_std_dev series2 = pd.Series(data=c, index=ts.times) # p = trailing_percentiles(c, window=300) # c = ts.exp_accel x = ts.times y = ts.values ax = plt.subplot(2, 1, 1) ax.set_yscale('log') plt.scatter(x, y, c=c, s=4) plt.grid() plt.subplot(2, 1, 2) plt.plot(x, c) # plt.plot(x, p/100) plt.grid() return
def test_buysell(): symbol = 'VOO' y = YahooData([symbol]) df = y.dataframes[symbol] df = df.iloc[-1000::2] series = df[DF_ADJ_CLOSE] times = utils.dates2days(series.index) prices = series.values buy, sell, growth, change = buysell(times, prices, min_hold_days=5, max_hold_days=100) num = len(buy) top = int(num / 25) ii = 0 plt.subplot(2, 1, 1) for buy1, sell1 in zip(buy, sell): times = series.index[[buy1, sell1]] closes = series[[buy1, sell1]] plt.plot(times, closes, '--', alpha=.5) ii += 1 if ii > top: break plt.plot(series.index, series.values, alpha=1) plt.grid() # pdb.set_trace() return
def env_spy(seed=0): def indicator1(df): close = df[DF_ADJ_CLOSE] windows = [100, 400] outs = {} for window in windows: ts = TrailingStats(close, window) out = ts.exp_growth out[np.isnan(out)] = 0 outs[f'growth({window})'] = out out = ts.exp_reg_diff out[np.isnan(out)] = 0 outs[f'diff({window})'] = out return outs stock_data = YahooData(symbols=['SPY']) indicators = Indicators(stock_data) indicators.create(indicator1) env = EnvBase( stock_data=stock_data, indicators=indicators, price_name=DF_ADJ_CLOSE, seed=seed, commission=0, ) env.reset() return env
def test_future(): symbol = 'VOO' y = YahooData([symbol]) df = y.dataframes[symbol] df = df.iloc[-1000::2] series = df[DF_ADJ_CLOSE] dates = series.index times = utils.dates2days(series.index) prices = series.values _, growth = avg_future_growth(times, prices, window=20) plt.figure() plt.plot(dates, prices, alpha=.2) plt.scatter(dates[0 : -20], prices[0 : -20], c=growth*100, s=20, cmap='coolwarm', vmin=-1., vmax=1.) plt.colorbar() plt.grid() plt.xlabel('Time') plt.ylabel('Price') return
def build_one(self, symbol: str): # feature_windows = [200, 400] # target_window = 400 feature_windows = self.feature_windows target_window = self.target_window split_date = np.datetime64('2016-01-01') yahoo = YahooData() df = yahoo[symbol] targets = self._targets(df, target_window) flist = [] for window in feature_windows: features = self._features(df, window) flist.append(features) X1, y1 = combine_datas(flist, targets) split_index = np.searchsorted(X1.index, split_date) X_train1 = X1[0:split_index] y_train1 = y1[0:split_index] X_test1 = X1[split_index:] y_test1 = y1[split_index:] return X_train1, y_train1, X_test1, y_test1
def test_trailing_avg(): symbol = 'VOO' y = YahooData([symbol]) df = y.dataframes[symbol] series = df[DF_ADJ_CLOSE].iloc[-100:] window = 21 ts = TrailingStats(series, window) avgs = [] for interval in ts._adj_close_intervals: out = np.mean(interval) avgs.append(out) avgs = np.array(avgs) assert np.all(np.isclose(avgs, ts.rolling_avg)) times = series.index[window:] plt.figure() plt.plot(ts.series, label='actual') plt.plot(ts.times, avgs, label='TEST AVG') plt.plot(ts.times, ts.rolling_avg, '--', label='IMPLEMENTED AVG') plt.legend()
def test_indicator(): y = YahooData(['DIS', 'GOOG']) indicators = Indicators(y) indicators.create(trailing_avg, 5, name='trailing') df1 = indicators.dataframes['DIS'] df2 = indicators.dataframes['GOOG'] assert 'trailing' in df1.columns assert 'trailing' in df2.columns
def _jit_speed_test1(): # Test jit speed from backtester.stockdata import YahooData from backtester.definitions import DF_ADJ_CLOSE from backtester import utils import timeit symbol = 'VOO' y = YahooData([symbol]) df = y.get_symbol_all(symbol) df = df.iloc[-100::2] series = df[DF_ADJ_CLOSE] times = utils.dates2days(series.index) times = times.values.astype(np.float64) prices = series.values def test_no(): d = _buysell_nojit( times, prices, min_hold_days=np.int64(1), max_hold_days=np.int64(20), skip=np.int64(1), ) return d def test_jit(): d = _buysell( times, prices, min_hold_days=np.int64(1), max_hold_days=np.int64(20), skip=np.int64(1), ) return d t1 = timeit.timeit(test_no, number=10000) t2 = timeit.timeit(test_jit, number=10000) print('Time without jit', t1) print('Time using jit', t2)
def indicator_table(self) -> TableData: yahoo = YahooData() s_list = [] for stock in self.stocks: df = yahoo[stock] out = self.func(df, *self.fargs, **self.fkwargs) series = pd.Series(data=out, index=df.index, name=stock) s_list.append(series) df = pd.concat(s_list, axis=1) return TableData(df)
def test_skip(): y = YahooData() df = y.dataframes['SPY'] series = df[DF_ADJ_CLOSE] for skip in range(2, 43, 3): ts = TrailingStats(series, window_size=20, skip=skip) slope = ts.slope_normalized # print(len(slope)) # print(len(ts.times)) assert len(slope) == len(ts.times)
def test_2(): symbol = 'VOO' y = YahooData([symbol]) df = y.dataframes[symbol] df = df.iloc[-1000::2] series = df[DF_ADJ_CLOSE] times = utils.dates2days(series.index) prices = series.values bs = BuySell(times, prices, min_hold_days=5, max_hold_days=50) bs.data bs.max_change
def test_max_loss(): y = YahooData() df = y.dataframes['VOO'] date1 = np.datetime64('2020-01-01') date2 = np.datetime64('2020-11-01') ii = (df.index > date1) & (df.index < date2) df = df.loc[ii] series = df[DF_ADJ_CLOSE] ts = TrailingStats(series, window_size=10) max_loss = ts.max_loss return
def test_table_data(): y = YahooData(['DIS']) table: TableData = y.tables['DIS'] date = np.datetime64('2016-01-01') ii = table.date_index(date) a1 = table.array_up_to(date) a2 = table.values[0:ii + 1] a3 = table.dataframe_up_to(date).values assert np.all(a1 == a2) assert np.all(a1 == a3) return
def test_close_intervals(): symbol = 'VOO' y = YahooData([symbol]) df = y.dataframes[symbol].iloc[-400:] series = df[DF_ADJ_CLOSE] window_size = 11 ssc = TrailingStats(series, window_size) interval = ssc._adj_close_intervals[0] correct = series.iloc[0:window_size] assert np.all(np.isclose(interval, correct)) interval = ssc._adj_close_intervals[-1] correct = series.iloc[-window_size:] assert np.all(np.isclose(interval, correct))
def test_skip1(): y = YahooData() df = y.dataframes['SPY'] series = df[DF_ADJ_CLOSE] skip = 10 window_size = 5 def compare_attr(stats1: TrailingStats, stats2: TrailingStats, name: str): print(f'comparing {name}') a1 = getattr(stats1, name)[::skip] a2 = getattr(stats2, name) a1[np.isnan(a1)] = 0 a2[np.isnan(a2)] = 0 l1 = len(a1) l2 = len(a2) print(f'len1 = {l1}') print(f'len2 = {l2}') assert np.all(a1 == a2) ts1 = TrailingStats(series, window_size=window_size) for skip in range(2, 10): print(f'testing skip {skip}') ts2 = TrailingStats(series, window_size=window_size, skip=skip) # # Check rolling avg. # rolling1 = ts1._adj_close_intervals.mean(axis=1) # rolling1 = ts1._append_nan(rolling1)[:: skip] # rolling1[np.isnan(rolling1)] = 0 # rolling11 = ts1.rolling_avg[:: skip] # rolling11[np.isnan(rolling11)] = 0 # assert np.all(rolling1 == rolling11) rolling2 = ts2._adj_close_intervals.mean(axis=1) rolling2[np.isnan(rolling2)] = 0 compare_attr(ts1, ts2, 'times') compare_attr(ts1, ts2, 'time_days_int') compare_attr(ts1, ts2, 'rolling_avg') compare_attr(ts1, ts2, 'return_ratio') compare_attr(ts1, ts2, 'slope_normalized')
def test_skip0(): y = YahooData() df = y.dataframes['SPY'] series = df[DF_ADJ_CLOSE] skip = 10 windows_size = 50 ts = TrailingStats(series, window_size=windows_size) times1 = ts.times[::skip] slopes1 = ts.slope_normalized[::skip] slopes1[np.isnan(slopes1)] = 0 ts2 = TrailingStats(series, window_size=windows_size, skip=skip) times2 = ts2.times slopes2 = ts2.slope_normalized slopes2[np.isnan(slopes2)] = 0 assert np.all(times1 == times2) assert np.all(slopes1 == slopes2)
# -*- coding: utf-8 -*- import numpy as np import matplotlib.pyplot as plt from scipy.signal import cwt from backtester.stockdata import YahooData from backtester.indicators import TrailingStats from backtester.definitions import DF_ADJ_CLOSE yahoo = YahooData() df = yahoo['GOOG'] series = df[DF_ADJ_CLOSE] y = series.values logy = np.log(y) windows = [20, 50, 70, 90, 600] growths = [] for window in windows: ts = TrailingStats(series, window) growths.append(ts.exp_growth) date1 = np.datetime64('2008-01-01') date2 = np.datetime64('2021-01-01') # f = np.abs(np.fft.fft(logy)) ax = plt.subplot(2, 1, 1) plt.plot(series) ax.set_yscale('log') plt.grid() plt.xlim(date1, date2) plt.subplot(2, 1, 2)
import matplotlib.pyplot as plt def fn_growth(df: pd.DataFrame, window_size): v = df[DF_ADJ_CLOSE] t = TrailingStats(v, window_size) return t.exp_growth def fn_std_change(df: pd.DataFrame, window_size): v = df[DF_ADJ_CLOSE] t = TrailingStats(v, window_size) return t.exp_std_dev yahoo = YahooData() stocks = yahoo.get_symbol_names() rs = np.random.RandomState(10) rs.shuffle(stocks) stocks = stocks[0:20] stocks = stocks + ['SPY'] class Indicators: def __init__(self, stocks: list[str], func: Callable, fargs=(), fkwargs=None): self.stocks = stocks
# -*- coding: utf-8 -*- import pdb import numpy as np import pytest from backtester.model import Action, SymbolTransactions from backtester.definitions import ACTION_BUY, ACTION_SELL, ACTION_SELL_PERCENT import datetime from backtester.stockdata import YahooData from backtester.exceptions import BacktestError # a1 = Action() yahoo_data = YahooData() def test_last_trading_date(): sh = SymbolTransactions('GOOG', yahoo_data, commission=0.0) date = datetime.datetime(2017, 2, 25) date = np.datetime64(date) last_date = sh.get_previous_trading_date(date) next_date = sh.get_next_trading_date(date) print('date: ', date) print('last trade date: ', last_date) print('next trade date: ', next_date) dates = sh.df.index assert last_date <= date assert next_date >= date return
from backtester.stockdata import YahooData, Indicators, MapData, LazyMap from backtester.definitions import DF_ADJ_CLOSE from backtester.indicators import TrailingStats, QuarterStats, MonthlyStats from backtester.utils import extract_column import matplotlib.pyplot as plt import seaborn as sns from globalcache import Cache sns.set_theme() cache = Cache(globals()) # %% Get stock data y_data = YahooData() start_date = np.datetime64('2012-01-01') symbols = y_data.retrieve_symbol_names() y_data = y_data.filter_dates(start=start_date) # symbols = symbols[0:5] # symbols = ['GME'] symbols.append('SPY') # %% Define calculations def sortino_ratio(r: np.ndarray, target: float, bins: int = 25) -> float: """https://en.wikipedia.org/wiki/Sortino_ratio
import pandas as pd import numpy as np import pdb from typing import Callable from backtester import Strategy, Backtest from backtester.indicators import TrailingStats from backtester.definitions import DF_ADJ_CLOSE from backtester.stockdata import YahooData, TableData, Indicators from backtester.exceptions import NotEnoughDataError from functools import cached_property import datetime import matplotlib.pyplot as plt yahoo = YahooData() symbols = yahoo.symbol_names rs = np.random.default_rng(5) rs.shuffle(symbols) STOCKS = symbols[0:60] STOCKS.append('SPY') STOCKS.append('VOO') # STOCKS.append('GOOG') # STOCKS.append('TSLA') STOCKS = np.array(STOCKS) def post1(df: pd.DataFrame): series = df[DF_ADJ_CLOSE] ts = TrailingStats(series, 252 * 5)
""" import numpy as np import pandas as pd import matplotlib.pyplot as plt import backtester from backtester.indicators import TrailingStats from backtester.analysis import BuySell, avg_future_growth from backtester.stockdata import YahooData from backtester.definitions import DF_ADJ_CLOSE from backtester import utils from backtester.smooth import SmoothOptimize, TrailingSavGol y = YahooData() names = y.get_symbol_names() np.random.seed(7) np.random.shuffle(names) symbol = 'MSFT' date1 = np.datetime64('2005-01-01') date2 = np.datetime64('2018-08-01') df = y.get_symbol_before(symbol, date2) ii = df.index.values >= date1 df = df.loc[ii] close = df[DF_ADJ_CLOSE] ts = TrailingStats(close, window_size=30) growth = ts.exp_growth
"""Explore different window sizes and growth.""" # -*- coding: utf-8 -*- import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns sns.set() from backtester.stockdata import YahooData from backtester.definitions import DF_ADJ_CLOSE from backtester.indicators import TrailingStats y = YahooData() start = np.datetime64('2003-01-01') end = np.datetime64('2005-01-01') y = y.filter_dates(start=start, end=end) s = y.dataframes['SPY'][DF_ADJ_CLOSE] window = 100 windows = [ 100, ] for window in windows: ts = TrailingStats(s, window) loss = ts.max_loss # loss99 = np.percentile(loss, 99) plt.subplot(2, 1, 1)
import pdb from typing import Callable from backtester import Strategy, Backtest from backtester.indicators import TrailingStats from backtester.definitions import DF_ADJ_CLOSE from backtester.stockdata import YahooData, TableData, Indicators from backtester.exceptions import NotEnoughDataError from functools import cached_property import datetime import matplotlib.pyplot as plt from dotenv import load_dotenv load_dotenv() yahoo = YahooData() symbols = yahoo.get_symbol_names() rs = np.random.default_rng(0) rs.shuffle(symbols) # STOCKS = symbols[0:100] # STOCKS.append('SPY') # STOCKS.append('VOO') # STOCKS.append('GOOG') # STOCKS.append('TSLA') STOCKS = ['SPY'] STOCKS = np.array(STOCKS) def post1(df: pd.DataFrame): series = df[DF_ADJ_CLOSE]
future_growth = self.reg.predict(indicators) growths.append(future_growth) imax = np.argmax(growths) my_growth = growths[imax] my_stock = stocks[imax] if my_stock != self.current_stock: self.sell_percent(self.current_stock, amount=1.0) action = self.buy(my_stock, self.available_funds) self.current_stock = my_stock print(action) self.ii += 1 return if __name__ == '__main__': y = YahooData(symbols=stocks) bt = Backtest( stock_data=y, strategy=Strat1, cash=100, commission=.002, start_date=datetime.datetime(2007, 4, 1), end_date=datetime.datetime(2021, 1, 26), ) bt.start() perf = bt.stats.performance assets = bt.stats.asset_values
from backtester.definitions import DF_ADJ_CLOSE from backtester.stockdata import YahooData from backtester.exceptions import NotEnoughDataError import datetime def func1(window_size, df: pd.DataFrame): v = df[DF_ADJ_CLOSE] t = TrailingStats(v, window_size) volatility = t.exp_std_dev rate = t.exp_growth pdb.set_trace() return rate, volatility yahoo_data = YahooData() all_names = yahoo_data.get_symbol_names() np.random.seed(0) np.random.shuffle(all_names) stocks1 = all_names[0 : 16] stocks = [] for stock in stocks1: if len(yahoo_data[stock]) > 0: stocks.append(stock) yahoo_data = YahooData(symbols=stocks) class Strat1(Strategy): def init(self): # Build long and short rate metrics
# -*- coding: utf-8 -*- import numpy as np import pandas as pd import matplotlib.pyplot as plt from backtester.indicators import TrailingStats from backtester.stockdata import YahooData, Indicators, TableData from backtester.definitions import DF_ADJ_CLOSE yahoo = YahooData() symbols = yahoo.get_symbol_names() rs = np.random.default_rng(0) rs.shuffle(symbols) STOCKS = symbols[0:200] STOCKS.append('SPY') STOCKS.append('VOO') # STOCKS.append('GOOG') # STOCKS.append('TSLA') STOCKS = np.array(STOCKS) def post2(df: pd.DataFrame): series = df[DF_ADJ_CLOSE] ts = TrailingStats(series, 10) stat1 = ts.exp_growth mean = np.nanmean(stat1) std = np.nanstd(stat1) return (stat1 - mean) / std