Ejemplo n.º 1
0
def plot(df: pd.DataFrame):
    s_close = df[DF_CLOSE]
    s_open = df[DF_OPEN]
    s_high = df[DF_HIGH]
    s_low = df[DF_LOW]
    s_time = df.index
    
    ax = plt.gca()    
    plt.xticks(rotation=60, ha='right')
    plt.grid(which='major', color='k', lw=0.25, alpha=0.5)
    plt.grid(which='minor', color='k', lw=0.25, alpha=0.2)
    
    
    years = matplotlib.dates.YearLocator()
    months = matplotlib.dates.MonthLocator()
    days = matplotlib.dates.DayLocator()
    
    dnum = len(df.index)
    if dnum / 12 > 20:
        ax.xaxis.set_major_locator(years)
        ax.xaxis.set_minor_locator(months)
        year_month = matplotlib.dates.DateFormatter('%Y')
        ax.xaxis.set_major_formatter(year_month)

    else:
        ax.xaxis.set_major_locator(months)
        ax.xaxis.set_minor_locator(days)
        year_month = matplotlib.dates.DateFormatter('%Y-%m')
        ax.xaxis.set_major_formatter(year_month)


    ts = TrailingStats(s_close, window_size=365*2)
    ts2 = TrailingStats(s_close, window_size=100)
    s_fit = ts.lin_reg_value
    s_fit2 = ts2.lin_reg_value

    
    s3 = savgol_filter(s_close, window_length=31, polyorder=3)
    s4 = savgol_filter(s_close, window_length=1001, polyorder=3 )
    s4 = savgol_filter(s4, window_length=301, polyorder=6 )


    plt.subplot(2,1,1)
    plt.semilogy(s_time, s_close, '.-', ms=2, alpha=.2)
    plt.semilogy(s_time, s_fit, '-', alpha=.6, label='exp fit-400')
    plt.semilogy(s_time, s_fit2, '-', alpha=.6, label='exp fit-40')    
    plt.semilogy(s_time, s3, '--', alpha=.6, label='sav-gol')
    plt.semilogy(s_time, s4, '-', alpha=.6, label='sav-gol-2')
    plt.legend()
    plt.grid()
    
    plt.subplot(2,1,2)
    plt.plot(s_time, ts2.exp_accel)
    plt.grid()
    

    
    # plt.semilogy(s_time, ts.rolling_avg, alpha=.6, label='rolling avg')
    # pdb.set_trace()
    return
Ejemplo n.º 2
0
def func1(window_size, df: pd.DataFrame):
    v = df[DF_ADJ_CLOSE]
    t = TrailingStats(v, window_size)
    volatility = t.exp_std_dev
    rate = t.exp_growth
    pdb.set_trace()
    return rate, volatility
Ejemplo n.º 3
0
def test_trailing_avg():

    symbol = 'VOO'
    y = YahooData([symbol])
    df = y.dataframes[symbol]

    series = df[DF_ADJ_CLOSE].iloc[-100:]
    window = 21
    ts = TrailingStats(series, window)

    avgs = []
    for interval in ts._adj_close_intervals:
        out = np.mean(interval)
        avgs.append(out)
    avgs = np.array(avgs)

    assert np.all(np.isclose(avgs, ts.rolling_avg))

    times = series.index[window:]

    plt.figure()
    plt.plot(ts.series, label='actual')
    plt.plot(ts.times, avgs, label='TEST AVG')
    plt.plot(ts.times, ts.rolling_avg, '--', label='IMPLEMENTED AVG')
    plt.legend()
Ejemplo n.º 4
0
def test_std():
    y = YahooData()
    df = y.dataframes['VOO']

    date1 = np.datetime64('2020-01-01')
    date2 = np.datetime64('2020-11-01')

    ii = (df.index > date1) & (df.index < date2)
    df = df.loc[ii]

    series = df[DF_ADJ_CLOSE]
    ts = TrailingStats(series, window_size=10)

    c = ts.exp_std_dev
    series2 = pd.Series(data=c, index=ts.times)
    # p = trailing_percentiles(c, window=300)

    # c = ts.exp_accel
    x = ts.times
    y = ts.values
    ax = plt.subplot(2, 1, 1)
    ax.set_yscale('log')
    plt.scatter(x, y, c=c, s=4)
    plt.grid()

    plt.subplot(2, 1, 2)
    plt.plot(x, c)
    # plt.plot(x, p/100)
    plt.grid()
    return
Ejemplo n.º 5
0
 def _rolling_stats(self, s: pd.Series):
     
     spy = self.dict['SPY']
     d = {}
     d['SPY'] = spy
     d['AAA'] = s
     
     df = pd.concat(d, join='inner', axis=1)
     spy = df['SPY']
     s = df['AAA']
     
     ts = TrailingStats(s, window_size=self.intervals)
     ts_spy = self._roi_spy_trailing_stats
     
     value_gen = zip(ts_spy._values_intervals,
                     ts._values_intervals,
                     ts_spy.mean)
     
     outputs = []
     for value_spy, value, target in value_gen:
         
         out = self._stats_function(
             s = pd.Series(value),
             roi_spy = value_spy,
             target = target
             )
         outputs.append(out)
     
     return pd.DataFrame(outputs, index=ts.times)        
Ejemplo n.º 6
0
def test_skip1():

    y = YahooData()
    df = y.dataframes['SPY']
    series = df[DF_ADJ_CLOSE]
    skip = 10
    window_size = 5

    def compare_attr(stats1: TrailingStats, stats2: TrailingStats, name: str):
        print(f'comparing {name}')

        a1 = getattr(stats1, name)[::skip]
        a2 = getattr(stats2, name)
        a1[np.isnan(a1)] = 0
        a2[np.isnan(a2)] = 0

        l1 = len(a1)
        l2 = len(a2)
        print(f'len1 = {l1}')
        print(f'len2 = {l2}')
        assert np.all(a1 == a2)

    ts1 = TrailingStats(series, window_size=window_size)

    for skip in range(2, 10):
        print(f'testing skip {skip}')

        ts2 = TrailingStats(series, window_size=window_size, skip=skip)

        # # Check rolling avg.
        # rolling1 = ts1._adj_close_intervals.mean(axis=1)

        # rolling1 = ts1._append_nan(rolling1)[:: skip]
        # rolling1[np.isnan(rolling1)] = 0
        # rolling11 = ts1.rolling_avg[:: skip]
        # rolling11[np.isnan(rolling11)] = 0

        # assert np.all(rolling1 == rolling11)

        rolling2 = ts2._adj_close_intervals.mean(axis=1)
        rolling2[np.isnan(rolling2)] = 0

        compare_attr(ts1, ts2, 'times')
        compare_attr(ts1, ts2, 'time_days_int')
        compare_attr(ts1, ts2, 'rolling_avg')
        compare_attr(ts1, ts2, 'return_ratio')
        compare_attr(ts1, ts2, 'slope_normalized')
Ejemplo n.º 7
0
def post2(df: pd.DataFrame):
    series = df[DF_ADJ_CLOSE]
    ts = TrailingStats(series, 10)
    stat1 = ts.exp_growth

    mean = np.nanmean(stat1)
    std = np.nanstd(stat1)
    return (stat1 - mean) / std
Ejemplo n.º 8
0
def post1(df: pd.DataFrame):
    series = df[DF_ADJ_CLOSE]
    ts = TrailingStats(series, 15)
    stat1 = ts.return_ratio

    mean = np.nanmean(stat1)
    std = np.nanstd(stat1)
    return (stat1 - mean) / std
Ejemplo n.º 9
0
def stop_loss(df: pd.DataFrame):
    series = df[DF_ADJ_CLOSE]
    ts = TrailingStats(series, window_size=200)
    loss = ts.max_loss
    gain = ts.max_gain
    delta = gain - loss
    delta = append_nan(delta, ts.window_size)
    return delta
Ejemplo n.º 10
0
def post2(df:pd.DataFrame):
    series = df[DF_ADJ_CLOSE]
    ts = TrailingStats(series, 8)
    slope, interc, rvalue = ts.linear_regression
    r2 = rvalue**2
    metric = slope * r2
    
    std = cumulative_std(metric)
    return metric / std
Ejemplo n.º 11
0
def my_indicators(df, window: int):
    ts = TrailingStats(df[DF_ADJ_CLOSE], window)
    i1 = ts.rolling_avg
    i2 = ts.exp_growth
    
    i1[np.isnan(i1)] = 0
    i2[np.isnan(i2)] = 0
    
    return i1, i2
Ejemplo n.º 12
0
def test_skip0():
    y = YahooData()
    df = y.dataframes['SPY']
    series = df[DF_ADJ_CLOSE]
    skip = 10
    windows_size = 50
    ts = TrailingStats(series, window_size=windows_size)
    times1 = ts.times[::skip]
    slopes1 = ts.slope_normalized[::skip]
    slopes1[np.isnan(slopes1)] = 0

    ts2 = TrailingStats(series, window_size=windows_size, skip=skip)
    times2 = ts2.times
    slopes2 = ts2.slope_normalized
    slopes2[np.isnan(slopes2)] = 0

    assert np.all(times1 == times2)
    assert np.all(slopes1 == slopes2)
Ejemplo n.º 13
0
def create_data(symbols: list[str]):
    # np.random.seed(0)
    # symbols = np.array(ALL)
    # np.random.shuffle(symbols)
    # symbols = symbols[0:1]

    y = YahooData(symbols)

    # window_sizes = [5, 10, 15, 20, 30, 40, 50, 60, 80, 100, 150, 200, 400]

    window_sizes = [5, 10, 20, 50, 100, 400]
    window_sizes = np.array(window_sizes)
    max_window = np.max(window_sizes)
    future_growth_window = 20
    attrs = [
        'exp_growth',
        'rolling_avg',
    ]

    df_dict = {}
    for ii, symbol in enumerate(symbols):
        print(f'Calculating symbol {symbol}')

        df = y.get_symbol_all(symbol)
        if len(df) == 0:
            print(f'Symbol {symbol} data not available')
        else:
            series = df[DF_ADJ_CLOSE]
            new = {}

            for window in window_sizes:

                # Get indicators
                ts = TrailingStats(series, window)
                for attr in attrs:
                    feature = getattr(ts, attr)
                    name = attr + f'({window})'
                    new[name] = feature[0:-future_growth_window]

            # Get future growths
            times = utils.dates2days(series.index.values)
            dates = series.index[0:-future_growth_window]

            _, growths = avg_future_growth(times,
                                           series.values,
                                           window=future_growth_window)

            new['avg_future_growth'] = growths
            new['date'] = dates

            df_new = pd.DataFrame(new)

            # Chop of NAN due to the largest window
            df_new = df_new.iloc[max_window:]
            df_dict[symbol] = df_new
    return df_dict
Ejemplo n.º 14
0
    def indicator1(df):
        close = df['Close']
        windows = [5, 10, 20, 50]
        outs = {}
        for window in windows:
            ts = TrailingStats(close, window)
            out = ts.exp_growth

            # Replace NAN with 0
            out[np.isnan(out)] = 0
            outs[f'{window}'] = out
        return outs
Ejemplo n.º 15
0
def test_skip():
    y = YahooData()
    df = y.dataframes['SPY']
    series = df[DF_ADJ_CLOSE]

    for skip in range(2, 43, 3):

        ts = TrailingStats(series, window_size=20, skip=skip)
        slope = ts.slope_normalized
        # print(len(slope))
        # print(len(ts.times))
        assert len(slope) == len(ts.times)
Ejemplo n.º 16
0
def test_max_loss():
    y = YahooData()
    df = y.dataframes['VOO']

    date1 = np.datetime64('2020-01-01')
    date2 = np.datetime64('2020-11-01')

    ii = (df.index > date1) & (df.index < date2)
    df = df.loc[ii]
    series = df[DF_ADJ_CLOSE]
    ts = TrailingStats(series, window_size=10)
    max_loss = ts.max_loss
    return
Ejemplo n.º 17
0
    def indicator1(df):
        close = df[DF_ADJ_CLOSE]
        windows = [100, 400]
        outs = {}
        for window in windows:
            ts = TrailingStats(close, window)
            out = ts.exp_growth
            out[np.isnan(out)] = 0
            outs[f'growth({window})'] = out

            out = ts.exp_reg_diff
            out[np.isnan(out)] = 0
            outs[f'diff({window})'] = out
        return outs
Ejemplo n.º 18
0
    def features(self, window: int):
        attrs = ['exp_growth', 'exp_accel']
        # attrs = ['exp_accel']
        series1 = self.series
        ts1 = TrailingStats(series1, window)

        new = {}
        for attr in attrs:
            feature = getattr(ts1, attr)
            name = f'Close({attr}, {window})'
            new[name] = feature

        df_new = pd.DataFrame(new, index=series.index)
        df_new = df_new.iloc[window :]
        return df_new
Ejemplo n.º 19
0
def test_close_intervals():

    symbol = 'VOO'
    y = YahooData([symbol])
    df = y.dataframes[symbol].iloc[-400:]
    series = df[DF_ADJ_CLOSE]

    window_size = 11
    ssc = TrailingStats(series, window_size)
    interval = ssc._adj_close_intervals[0]
    correct = series.iloc[0:window_size]
    assert np.all(np.isclose(interval, correct))

    interval = ssc._adj_close_intervals[-1]
    correct = series.iloc[-window_size:]
    assert np.all(np.isclose(interval, correct))
Ejemplo n.º 20
0
def post1(df:pd.DataFrame):
    
    series = df[DF_ADJ_CLOSE]
    pratio = peak_ratio(series.values)
    pseries = pd.Series(data=pratio, index=series.index)
    
    ts = TrailingStats(pseries, 15)
    slope, interc, rvalue = ts.linear_regression
    r2 = rvalue**2
    
    slope_std = cumulative_std(slope)    
    r_std = cumulative_std(rvalue)
    
    d = {}
    d['metric'] = -slope * r2 / slope_std
    d['slope'] = slope
    d['r2'] = r2
    d['peak_ratio'] = pratio
    return d
Ejemplo n.º 21
0
def my_indicator(df):
    series = df[DF_ADJ_CLOSE]
    # windows = [20, 50, 70, 90, 600]
    windows = [400, 600, 1200]
    windows = np.array(windows)
    weights = np.sqrt(windows) / windows[-1]
    weights = weights[:, None]

    # pdb.set_trace()
    growths = []
    for window in windows:
        ts = TrailingStats(series, window)
        g1 = ts.exp_growth
        g1[np.isnan(g1)] = -1
        growths.append(ts.exp_growth)

    growths = np.array(growths)  #* weights
    out = growths.max(axis=0)
    # out[max_loss > .15] = 0
    return out
Ejemplo n.º 22
0
def post1(df: pd.DataFrame):
    series = df[DF_ADJ_CLOSE]
    series_log = np.log(series)

    windows = [1, 2, 4]

    slope_arr = []
    rvalue_arr = []
    for window in windows:
        ts = TrailingStats(series_log, 252 * window)
        slopes, b, r = ts.linear_regression
        slope_arr.append(slopes)
        rvalue_arr.append(r**2)

    slope_arr = np.array(slope_arr)
    rvalue_arr = np.array(rvalue_arr)
    ii = np.argmax(rvalue_arr, axis=0)
    jj = np.arange(len(ii))
    slope_arr = slope_arr[ii, jj]
    return slope_arr
Ejemplo n.º 23
0
    def _features(self, df, window: int):
        attrs = ['exp_growth', 'exp_std_dev']
        series1 = df[DF_ADJ_CLOSE]

        ts1 = TrailingStats(series1, window)

        new = {}
        for attr in attrs:
            feature = getattr(ts1, attr)

            # Standardize exp_std_dev
            if 'exp_std_dev' in attr:
                mean = np.nanmean(feature)
                std = np.nanstd(feature)
                feature = (feature - mean) / std

            name = f'Close({attr}, {window})'
            new[name] = feature

        df_new = pd.DataFrame(new, index=df.index)
        df_new = df_new.iloc[window:]
        return df_new
Ejemplo n.º 24
0
import matplotlib.pyplot as plt
from scipy.signal import cwt

from backtester.stockdata import YahooData
from backtester.indicators import TrailingStats
from backtester.definitions import DF_ADJ_CLOSE
yahoo = YahooData()
df = yahoo['GOOG']
series = df[DF_ADJ_CLOSE]
y = series.values
logy = np.log(y)

windows = [20, 50, 70, 90, 600]
growths = []
for window in windows:
    ts = TrailingStats(series, window)
    growths.append(ts.exp_growth)

date1 = np.datetime64('2008-01-01')
date2 = np.datetime64('2021-01-01')

# f = np.abs(np.fft.fft(logy))
ax = plt.subplot(2, 1, 1)
plt.plot(series)
ax.set_yscale('log')
plt.grid()
plt.xlim(date1, date2)

plt.subplot(2, 1, 2)
for ii, window in enumerate(windows):
    plt.plot(series.index, growths[ii], label=window)
Ejemplo n.º 25
0
def fn_std_change(df: pd.DataFrame, window_size):
    v = df[DF_ADJ_CLOSE]
    t = TrailingStats(v, window_size)
    return t.exp_std_dev
Ejemplo n.º 26
0
def fn_growth(df: pd.DataFrame, window_size):
    v = df[DF_ADJ_CLOSE]
    t = TrailingStats(v, window_size)
    return t.exp_growth
Ejemplo n.º 27
0
from backtester.definitions import DF_ADJ_CLOSE
from backtester.indicators import TrailingStats

y = YahooData()
start = np.datetime64('2003-01-01')
end = np.datetime64('2005-01-01')
y = y.filter_dates(start=start, end=end)

s = y.dataframes['SPY'][DF_ADJ_CLOSE]

window = 100
windows = [
    100,
]
for window in windows:
    ts = TrailingStats(s, window)
    loss = ts.max_loss
    # loss99 = np.percentile(loss, 99)

    plt.subplot(2, 1, 1)

    plt.plot(ts.times, ts.max_loss, label='max loss')
    plt.plot(ts.times, ts.max_gain, label='max gain')
    delta = ts.max_gain - ts.max_loss
    plt.plot(ts.times, delta, label='delta')

max_loss_mean = np.mean(ts.max_loss)
max_loss_p = np.percentile(ts.max_loss, 95)
plt.axhline(0.0)
plt.legend()
plt.subplot(2, 1, 2)
Ejemplo n.º 28
0
def post1(df: pd.DataFrame):
    series = df[DF_ADJ_CLOSE]
    ts = TrailingStats(series, 100)
    metric = ts.max_gain - ts.max_loss

    return metric
Ejemplo n.º 29
0
def func2(window_size, df: pd.DataFrame):
    v = df[DF_ADJ_CLOSE]
    t = TrailingStats(v, window_size)
    return t.exp_std_dev
Ejemplo n.º 30
0
def post1(df: pd.DataFrame):
    series = df[DF_ADJ_CLOSE]
    ts = TrailingStats(series, 252 * 5)
    stat1 = ts.exp_growth
    return stat1