Ejemplo n.º 1
0
def create_data(symbols: list[str]):
    # np.random.seed(0)
    # symbols = np.array(ALL)
    # np.random.shuffle(symbols)
    # symbols = symbols[0:1]

    y = YahooData(symbols)

    # window_sizes = [5, 10, 15, 20, 30, 40, 50, 60, 80, 100, 150, 200, 400]

    window_sizes = [5, 10, 20, 50, 100, 400]
    window_sizes = np.array(window_sizes)
    max_window = np.max(window_sizes)
    future_growth_window = 20
    attrs = [
        'exp_growth',
        'rolling_avg',
    ]

    df_dict = {}
    for ii, symbol in enumerate(symbols):
        print(f'Calculating symbol {symbol}')

        df = y.get_symbol_all(symbol)
        if len(df) == 0:
            print(f'Symbol {symbol} data not available')
        else:
            series = df[DF_ADJ_CLOSE]
            new = {}

            for window in window_sizes:

                # Get indicators
                ts = TrailingStats(series, window)
                for attr in attrs:
                    feature = getattr(ts, attr)
                    name = attr + f'({window})'
                    new[name] = feature[0:-future_growth_window]

            # Get future growths
            times = utils.dates2days(series.index.values)
            dates = series.index[0:-future_growth_window]

            _, growths = avg_future_growth(times,
                                           series.values,
                                           window=future_growth_window)

            new['avg_future_growth'] = growths
            new['date'] = dates

            df_new = pd.DataFrame(new)

            # Chop of NAN due to the largest window
            df_new = df_new.iloc[max_window:]
            df_dict[symbol] = df_new
    return df_dict
Ejemplo n.º 2
0
def test_get_trade_dates():

    y = YahooData(['DIS', 'GOOG'])
    dates = y.get_trade_dates()

    delta = dates[1:] - dates[0:-1]
    delta = delta.astype('timedelta64[D]')
    assert np.max(delta) < 10
    plt.plot(dates, dates, '.')
    return
Ejemplo n.º 3
0
def test_yahoo():

    y = YahooData(['DIS', 'GOOG'])
    date = datetime(2016, 5, 23)
    date = np.datetime64(date)
    out = y.filter_dates(end=date)

    # out  = y.filter_before(date)
    # y.get_symbols_before(date)
    df3 = out.dataframes['GOOG']
    assert df3.index[-1] <= date
    return
Ejemplo n.º 4
0
def test_std():
    y = YahooData()
    df = y.dataframes['VOO']

    date1 = np.datetime64('2020-01-01')
    date2 = np.datetime64('2020-11-01')

    ii = (df.index > date1) & (df.index < date2)
    df = df.loc[ii]

    series = df[DF_ADJ_CLOSE]
    ts = TrailingStats(series, window_size=10)

    c = ts.exp_std_dev
    series2 = pd.Series(data=c, index=ts.times)
    # p = trailing_percentiles(c, window=300)

    # c = ts.exp_accel
    x = ts.times
    y = ts.values
    ax = plt.subplot(2, 1, 1)
    ax.set_yscale('log')
    plt.scatter(x, y, c=c, s=4)
    plt.grid()

    plt.subplot(2, 1, 2)
    plt.plot(x, c)
    # plt.plot(x, p/100)
    plt.grid()
    return
Ejemplo n.º 5
0
def test_buysell():

    symbol = 'VOO'
    y = YahooData([symbol])
    df = y.dataframes[symbol]
    df = df.iloc[-1000::2]

    series = df[DF_ADJ_CLOSE]

    times = utils.dates2days(series.index)
    prices = series.values

    buy, sell, growth, change = buysell(times,
                                        prices,
                                        min_hold_days=5,
                                        max_hold_days=100)

    num = len(buy)
    top = int(num / 25)

    ii = 0
    plt.subplot(2, 1, 1)
    for buy1, sell1 in zip(buy, sell):
        times = series.index[[buy1, sell1]]
        closes = series[[buy1, sell1]]
        plt.plot(times, closes, '--', alpha=.5)
        ii += 1
        if ii > top:
            break

    plt.plot(series.index, series.values, alpha=1)
    plt.grid()

    # pdb.set_trace()
    return
Ejemplo n.º 6
0
def env_spy(seed=0):
    def indicator1(df):
        close = df[DF_ADJ_CLOSE]
        windows = [100, 400]
        outs = {}
        for window in windows:
            ts = TrailingStats(close, window)
            out = ts.exp_growth
            out[np.isnan(out)] = 0
            outs[f'growth({window})'] = out

            out = ts.exp_reg_diff
            out[np.isnan(out)] = 0
            outs[f'diff({window})'] = out
        return outs

    stock_data = YahooData(symbols=['SPY'])
    indicators = Indicators(stock_data)
    indicators.create(indicator1)

    env = EnvBase(
        stock_data=stock_data,
        indicators=indicators,
        price_name=DF_ADJ_CLOSE,
        seed=seed,
        commission=0,
    )
    env.reset()
    return env
Ejemplo n.º 7
0
def test_future():
    
    symbol = 'VOO'
    y = YahooData([symbol])
    df = y.dataframes[symbol]
    df = df.iloc[-1000::2]
    
    series = df[DF_ADJ_CLOSE]
    dates = series.index
    times = utils.dates2days(series.index)
    prices = series.values
    
    
    _, growth = avg_future_growth(times, prices, window=20)

    plt.figure()
    plt.plot(dates, prices, alpha=.2)
    
    plt.scatter(dates[0 : -20],
                prices[0 : -20], 
                c=growth*100, s=20, cmap='coolwarm', 
                vmin=-1., vmax=1.)
    plt.colorbar()
    plt.grid()
    plt.xlabel('Time')
    plt.ylabel('Price')
    return
Ejemplo n.º 8
0
    def build_one(self, symbol: str):

        # feature_windows = [200, 400]
        # target_window = 400
        feature_windows = self.feature_windows
        target_window = self.target_window
        split_date = np.datetime64('2016-01-01')

        yahoo = YahooData()
        df = yahoo[symbol]
        targets = self._targets(df, target_window)
        flist = []
        for window in feature_windows:
            features = self._features(df, window)
            flist.append(features)

        X1, y1 = combine_datas(flist, targets)

        split_index = np.searchsorted(X1.index, split_date)
        X_train1 = X1[0:split_index]
        y_train1 = y1[0:split_index]

        X_test1 = X1[split_index:]
        y_test1 = y1[split_index:]

        return X_train1, y_train1, X_test1, y_test1
Ejemplo n.º 9
0
def test_trailing_avg():

    symbol = 'VOO'
    y = YahooData([symbol])
    df = y.dataframes[symbol]

    series = df[DF_ADJ_CLOSE].iloc[-100:]
    window = 21
    ts = TrailingStats(series, window)

    avgs = []
    for interval in ts._adj_close_intervals:
        out = np.mean(interval)
        avgs.append(out)
    avgs = np.array(avgs)

    assert np.all(np.isclose(avgs, ts.rolling_avg))

    times = series.index[window:]

    plt.figure()
    plt.plot(ts.series, label='actual')
    plt.plot(ts.times, avgs, label='TEST AVG')
    plt.plot(ts.times, ts.rolling_avg, '--', label='IMPLEMENTED AVG')
    plt.legend()
Ejemplo n.º 10
0
def test_indicator():
    y = YahooData(['DIS', 'GOOG'])
    indicators = Indicators(y)
    indicators.create(trailing_avg, 5, name='trailing')
    df1 = indicators.dataframes['DIS']
    df2 = indicators.dataframes['GOOG']
    assert 'trailing' in df1.columns
    assert 'trailing' in df2.columns
Ejemplo n.º 11
0
def _jit_speed_test1():

    # Test jit speed
    from backtester.stockdata import YahooData
    from backtester.definitions import DF_ADJ_CLOSE
    from backtester import utils
    import timeit

    symbol = 'VOO'
    y = YahooData([symbol])
    df = y.get_symbol_all(symbol)
    df = df.iloc[-100::2]

    series = df[DF_ADJ_CLOSE]

    times = utils.dates2days(series.index)
    times = times.values.astype(np.float64)
    prices = series.values

    def test_no():
        d = _buysell_nojit(
            times,
            prices,
            min_hold_days=np.int64(1),
            max_hold_days=np.int64(20),
            skip=np.int64(1),
        )
        return d

    def test_jit():
        d = _buysell(
            times,
            prices,
            min_hold_days=np.int64(1),
            max_hold_days=np.int64(20),
            skip=np.int64(1),
        )
        return d

    t1 = timeit.timeit(test_no, number=10000)
    t2 = timeit.timeit(test_jit, number=10000)
    print('Time without jit', t1)
    print('Time using jit', t2)
Ejemplo n.º 12
0
    def indicator_table(self) -> TableData:
        yahoo = YahooData()
        s_list = []
        for stock in self.stocks:
            df = yahoo[stock]
            out = self.func(df, *self.fargs, **self.fkwargs)
            series = pd.Series(data=out, index=df.index, name=stock)
            s_list.append(series)

        df = pd.concat(s_list, axis=1)
        return TableData(df)
Ejemplo n.º 13
0
def test_skip():
    y = YahooData()
    df = y.dataframes['SPY']
    series = df[DF_ADJ_CLOSE]

    for skip in range(2, 43, 3):

        ts = TrailingStats(series, window_size=20, skip=skip)
        slope = ts.slope_normalized
        # print(len(slope))
        # print(len(ts.times))
        assert len(slope) == len(ts.times)
Ejemplo n.º 14
0
def test_2():

    symbol = 'VOO'
    y = YahooData([symbol])
    df = y.dataframes[symbol]
    df = df.iloc[-1000::2]
    series = df[DF_ADJ_CLOSE]
    times = utils.dates2days(series.index)
    prices = series.values

    bs = BuySell(times, prices, min_hold_days=5, max_hold_days=50)
    bs.data
    bs.max_change
Ejemplo n.º 15
0
def test_max_loss():
    y = YahooData()
    df = y.dataframes['VOO']

    date1 = np.datetime64('2020-01-01')
    date2 = np.datetime64('2020-11-01')

    ii = (df.index > date1) & (df.index < date2)
    df = df.loc[ii]
    series = df[DF_ADJ_CLOSE]
    ts = TrailingStats(series, window_size=10)
    max_loss = ts.max_loss
    return
Ejemplo n.º 16
0
def test_table_data():
    y = YahooData(['DIS'])
    table: TableData = y.tables['DIS']

    date = np.datetime64('2016-01-01')
    ii = table.date_index(date)

    a1 = table.array_up_to(date)
    a2 = table.values[0:ii + 1]
    a3 = table.dataframe_up_to(date).values
    assert np.all(a1 == a2)
    assert np.all(a1 == a3)

    return
Ejemplo n.º 17
0
def test_close_intervals():

    symbol = 'VOO'
    y = YahooData([symbol])
    df = y.dataframes[symbol].iloc[-400:]
    series = df[DF_ADJ_CLOSE]

    window_size = 11
    ssc = TrailingStats(series, window_size)
    interval = ssc._adj_close_intervals[0]
    correct = series.iloc[0:window_size]
    assert np.all(np.isclose(interval, correct))

    interval = ssc._adj_close_intervals[-1]
    correct = series.iloc[-window_size:]
    assert np.all(np.isclose(interval, correct))
Ejemplo n.º 18
0
def test_skip1():

    y = YahooData()
    df = y.dataframes['SPY']
    series = df[DF_ADJ_CLOSE]
    skip = 10
    window_size = 5

    def compare_attr(stats1: TrailingStats, stats2: TrailingStats, name: str):
        print(f'comparing {name}')

        a1 = getattr(stats1, name)[::skip]
        a2 = getattr(stats2, name)
        a1[np.isnan(a1)] = 0
        a2[np.isnan(a2)] = 0

        l1 = len(a1)
        l2 = len(a2)
        print(f'len1 = {l1}')
        print(f'len2 = {l2}')
        assert np.all(a1 == a2)

    ts1 = TrailingStats(series, window_size=window_size)

    for skip in range(2, 10):
        print(f'testing skip {skip}')

        ts2 = TrailingStats(series, window_size=window_size, skip=skip)

        # # Check rolling avg.
        # rolling1 = ts1._adj_close_intervals.mean(axis=1)

        # rolling1 = ts1._append_nan(rolling1)[:: skip]
        # rolling1[np.isnan(rolling1)] = 0
        # rolling11 = ts1.rolling_avg[:: skip]
        # rolling11[np.isnan(rolling11)] = 0

        # assert np.all(rolling1 == rolling11)

        rolling2 = ts2._adj_close_intervals.mean(axis=1)
        rolling2[np.isnan(rolling2)] = 0

        compare_attr(ts1, ts2, 'times')
        compare_attr(ts1, ts2, 'time_days_int')
        compare_attr(ts1, ts2, 'rolling_avg')
        compare_attr(ts1, ts2, 'return_ratio')
        compare_attr(ts1, ts2, 'slope_normalized')
Ejemplo n.º 19
0
def test_skip0():
    y = YahooData()
    df = y.dataframes['SPY']
    series = df[DF_ADJ_CLOSE]
    skip = 10
    windows_size = 50
    ts = TrailingStats(series, window_size=windows_size)
    times1 = ts.times[::skip]
    slopes1 = ts.slope_normalized[::skip]
    slopes1[np.isnan(slopes1)] = 0

    ts2 = TrailingStats(series, window_size=windows_size, skip=skip)
    times2 = ts2.times
    slopes2 = ts2.slope_normalized
    slopes2[np.isnan(slopes2)] = 0

    assert np.all(times1 == times2)
    assert np.all(slopes1 == slopes2)
Ejemplo n.º 20
0
# -*- coding: utf-8 -*-
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import cwt

from backtester.stockdata import YahooData
from backtester.indicators import TrailingStats
from backtester.definitions import DF_ADJ_CLOSE
yahoo = YahooData()
df = yahoo['GOOG']
series = df[DF_ADJ_CLOSE]
y = series.values
logy = np.log(y)

windows = [20, 50, 70, 90, 600]
growths = []
for window in windows:
    ts = TrailingStats(series, window)
    growths.append(ts.exp_growth)

date1 = np.datetime64('2008-01-01')
date2 = np.datetime64('2021-01-01')

# f = np.abs(np.fft.fft(logy))
ax = plt.subplot(2, 1, 1)
plt.plot(series)
ax.set_yscale('log')
plt.grid()
plt.xlim(date1, date2)

plt.subplot(2, 1, 2)
Ejemplo n.º 21
0
import matplotlib.pyplot as plt


def fn_growth(df: pd.DataFrame, window_size):
    v = df[DF_ADJ_CLOSE]
    t = TrailingStats(v, window_size)
    return t.exp_growth


def fn_std_change(df: pd.DataFrame, window_size):
    v = df[DF_ADJ_CLOSE]
    t = TrailingStats(v, window_size)
    return t.exp_std_dev


yahoo = YahooData()

stocks = yahoo.get_symbol_names()
rs = np.random.RandomState(10)
rs.shuffle(stocks)
stocks = stocks[0:20]
stocks = stocks + ['SPY']


class Indicators:
    def __init__(self,
                 stocks: list[str],
                 func: Callable,
                 fargs=(),
                 fkwargs=None):
        self.stocks = stocks
Ejemplo n.º 22
0
# -*- coding: utf-8 -*-
import pdb
import numpy as np
import pytest

from backtester.model import Action, SymbolTransactions
from backtester.definitions import ACTION_BUY, ACTION_SELL, ACTION_SELL_PERCENT
import datetime
from backtester.stockdata import YahooData
from backtester.exceptions import BacktestError
# a1 = Action()

yahoo_data = YahooData()


def test_last_trading_date():
    sh = SymbolTransactions('GOOG', yahoo_data, commission=0.0)
    date = datetime.datetime(2017, 2, 25)
    date = np.datetime64(date)

    last_date = sh.get_previous_trading_date(date)
    next_date = sh.get_next_trading_date(date)

    print('date: ', date)
    print('last trade date: ', last_date)
    print('next trade date: ', next_date)

    dates = sh.df.index
    assert last_date <= date
    assert next_date >= date
    return
Ejemplo n.º 23
0
from backtester.stockdata import YahooData, Indicators, MapData, LazyMap
from backtester.definitions import DF_ADJ_CLOSE
from backtester.indicators import TrailingStats, QuarterStats, MonthlyStats
from backtester.utils import extract_column

import matplotlib.pyplot as plt
import seaborn as sns
from globalcache import Cache

sns.set_theme()

cache = Cache(globals())

# %% Get stock data

y_data = YahooData()

start_date = np.datetime64('2012-01-01')
symbols = y_data.retrieve_symbol_names()
y_data = y_data.filter_dates(start=start_date)

# symbols = symbols[0:5]
# symbols = ['GME']

symbols.append('SPY')

# %% Define calculations


def sortino_ratio(r: np.ndarray, target: float, bins: int = 25) -> float:
    """https://en.wikipedia.org/wiki/Sortino_ratio
Ejemplo n.º 24
0
import pandas as pd
import numpy as np
import pdb
from typing import Callable
from backtester import Strategy, Backtest
from backtester.indicators import TrailingStats
from backtester.definitions import DF_ADJ_CLOSE
from backtester.stockdata import YahooData, TableData, Indicators
from backtester.exceptions import NotEnoughDataError

from functools import cached_property
import datetime

import matplotlib.pyplot as plt

yahoo = YahooData()
symbols = yahoo.symbol_names
rs = np.random.default_rng(5)
rs.shuffle(symbols)

STOCKS = symbols[0:60]
STOCKS.append('SPY')
STOCKS.append('VOO')
# STOCKS.append('GOOG')
# STOCKS.append('TSLA')
STOCKS = np.array(STOCKS)


def post1(df: pd.DataFrame):
    series = df[DF_ADJ_CLOSE]
    ts = TrailingStats(series, 252 * 5)
Ejemplo n.º 25
0
"""

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import backtester

from backtester.indicators import TrailingStats
from backtester.analysis import BuySell, avg_future_growth
from backtester.stockdata import YahooData
from backtester.definitions import DF_ADJ_CLOSE
from backtester import utils
from backtester.smooth import SmoothOptimize, TrailingSavGol

y = YahooData()
names = y.get_symbol_names()
np.random.seed(7)
np.random.shuffle(names)

symbol = 'MSFT'
date1 = np.datetime64('2005-01-01')
date2 = np.datetime64('2018-08-01')

df = y.get_symbol_before(symbol, date2)
ii = df.index.values >= date1
df = df.loc[ii]
close = df[DF_ADJ_CLOSE]

ts = TrailingStats(close, window_size=30)
growth = ts.exp_growth
Ejemplo n.º 26
0
"""Explore different window sizes and growth."""

# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set()

from backtester.stockdata import YahooData
from backtester.definitions import DF_ADJ_CLOSE
from backtester.indicators import TrailingStats

y = YahooData()
start = np.datetime64('2003-01-01')
end = np.datetime64('2005-01-01')
y = y.filter_dates(start=start, end=end)

s = y.dataframes['SPY'][DF_ADJ_CLOSE]

window = 100
windows = [
    100,
]
for window in windows:
    ts = TrailingStats(s, window)
    loss = ts.max_loss
    # loss99 = np.percentile(loss, 99)

    plt.subplot(2, 1, 1)
Ejemplo n.º 27
0
import pdb
from typing import Callable
from backtester import Strategy, Backtest
from backtester.indicators import TrailingStats
from backtester.definitions import DF_ADJ_CLOSE
from backtester.stockdata import YahooData, TableData, Indicators
from backtester.exceptions import NotEnoughDataError

from functools import cached_property
import datetime

import matplotlib.pyplot as plt
from dotenv import load_dotenv
load_dotenv()

yahoo = YahooData()
symbols = yahoo.get_symbol_names()
rs = np.random.default_rng(0)
rs.shuffle(symbols)

# STOCKS = symbols[0:100]
# STOCKS.append('SPY')
# STOCKS.append('VOO')
# STOCKS.append('GOOG')
# STOCKS.append('TSLA')
STOCKS = ['SPY']
STOCKS = np.array(STOCKS)


def post1(df: pd.DataFrame):
    series = df[DF_ADJ_CLOSE]
Ejemplo n.º 28
0
            future_growth = self.reg.predict(indicators)
            growths.append(future_growth)
        imax = np.argmax(growths)
        my_growth = growths[imax]
        my_stock = stocks[imax]

        if my_stock != self.current_stock:
            self.sell_percent(self.current_stock, amount=1.0)
            action = self.buy(my_stock, self.available_funds)
            self.current_stock = my_stock
            print(action)

        self.ii += 1
        return


if __name__ == '__main__':
    y = YahooData(symbols=stocks)

    bt = Backtest(
        stock_data=y,
        strategy=Strat1,
        cash=100,
        commission=.002,
        start_date=datetime.datetime(2007, 4, 1),
        end_date=datetime.datetime(2021, 1, 26),
    )
    bt.start()
    perf = bt.stats.performance
    assets = bt.stats.asset_values
Ejemplo n.º 29
0
from backtester.definitions import DF_ADJ_CLOSE
from backtester.stockdata import YahooData
from backtester.exceptions import NotEnoughDataError
import datetime


def func1(window_size, df: pd.DataFrame):
    v = df[DF_ADJ_CLOSE]
    t = TrailingStats(v, window_size)
    volatility = t.exp_std_dev
    rate = t.exp_growth
    pdb.set_trace()
    return rate, volatility


yahoo_data = YahooData()
all_names = yahoo_data.get_symbol_names()
np.random.seed(0)
np.random.shuffle(all_names)
stocks1 = all_names[0 : 16]
stocks = []
for stock in stocks1:
    if len(yahoo_data[stock]) > 0:
        stocks.append(stock)
yahoo_data = YahooData(symbols=stocks)


class Strat1(Strategy):
    
    def init(self):
        # Build long and short rate metrics
Ejemplo n.º 30
0
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from backtester.indicators import TrailingStats
from backtester.stockdata import YahooData, Indicators, TableData
from backtester.definitions import DF_ADJ_CLOSE

yahoo = YahooData()
symbols = yahoo.get_symbol_names()
rs = np.random.default_rng(0)
rs.shuffle(symbols)

STOCKS = symbols[0:200]
STOCKS.append('SPY')
STOCKS.append('VOO')
# STOCKS.append('GOOG')
# STOCKS.append('TSLA')
STOCKS = np.array(STOCKS)


def post2(df: pd.DataFrame):
    series = df[DF_ADJ_CLOSE]
    ts = TrailingStats(series, 10)
    stat1 = ts.exp_growth

    mean = np.nanmean(stat1)
    std = np.nanstd(stat1)
    return (stat1 - mean) / std