Ejemplo n.º 1
0
def create_data(symbols: list[str]):
    # np.random.seed(0)
    # symbols = np.array(ALL)
    # np.random.shuffle(symbols)
    # symbols = symbols[0:1]

    y = YahooData(symbols)

    # window_sizes = [5, 10, 15, 20, 30, 40, 50, 60, 80, 100, 150, 200, 400]

    window_sizes = [5, 10, 20, 50, 100, 400]
    window_sizes = np.array(window_sizes)
    max_window = np.max(window_sizes)
    future_growth_window = 20
    attrs = [
        'exp_growth',
        'rolling_avg',
    ]

    df_dict = {}
    for ii, symbol in enumerate(symbols):
        print(f'Calculating symbol {symbol}')

        df = y.get_symbol_all(symbol)
        if len(df) == 0:
            print(f'Symbol {symbol} data not available')
        else:
            series = df[DF_ADJ_CLOSE]
            new = {}

            for window in window_sizes:

                # Get indicators
                ts = TrailingStats(series, window)
                for attr in attrs:
                    feature = getattr(ts, attr)
                    name = attr + f'({window})'
                    new[name] = feature[0:-future_growth_window]

            # Get future growths
            times = utils.dates2days(series.index.values)
            dates = series.index[0:-future_growth_window]

            _, growths = avg_future_growth(times,
                                           series.values,
                                           window=future_growth_window)

            new['avg_future_growth'] = growths
            new['date'] = dates

            df_new = pd.DataFrame(new)

            # Chop of NAN due to the largest window
            df_new = df_new.iloc[max_window:]
            df_dict[symbol] = df_new
    return df_dict
Ejemplo n.º 2
0
def _jit_speed_test1():

    # Test jit speed
    from backtester.stockdata import YahooData
    from backtester.definitions import DF_ADJ_CLOSE
    from backtester import utils
    import timeit

    symbol = 'VOO'
    y = YahooData([symbol])
    df = y.get_symbol_all(symbol)
    df = df.iloc[-100::2]

    series = df[DF_ADJ_CLOSE]

    times = utils.dates2days(series.index)
    times = times.values.astype(np.float64)
    prices = series.values

    def test_no():
        d = _buysell_nojit(
            times,
            prices,
            min_hold_days=np.int64(1),
            max_hold_days=np.int64(20),
            skip=np.int64(1),
        )
        return d

    def test_jit():
        d = _buysell(
            times,
            prices,
            min_hold_days=np.int64(1),
            max_hold_days=np.int64(20),
            skip=np.int64(1),
        )
        return d

    t1 = timeit.timeit(test_no, number=10000)
    t2 = timeit.timeit(test_jit, number=10000)
    print('Time without jit', t1)
    print('Time using jit', t2)
Ejemplo n.º 3
0
    series = df[DF_ADJ_CLOSE]
    ts = TrailingStats(series, 15)
    stat1 = ts.return_ratio

    mean = np.nanmean(stat1)
    std = np.nanstd(stat1)
    return (stat1 - mean) / std


yahoo.symbols = STOCKS
indicator = Indicators(yahoo)
indicator.create(post1)

df = indicator.get_column_from_all('post1()')

spy = yahoo.get_symbol_all('SPY')[DF_ADJ_CLOSE]

table = TableData(df)
# %%
ii = df.values < -1.5
stocknum = ii.shape[-1]
isum = np.sum(ii, axis=1)

imean = np.nanmean(df.values, axis=1)
# date1 = np.datetime64('2008-01-01')
# date2 = np.datetime64('2009-01-01')
date1 = None
date2 = None
plt.subplot(2, 2, 1)
plt.plot(df.index, imean)
plt.grid()
Ejemplo n.º 4
0
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from numba import njit

from backtester.indicators import TrailingStats
from backtester.smooth import TrailingSavGol

from backtester.stockdata import YahooData, Indicators, TableData
from backtester.definitions import DF_ADJ_CLOSE

yahoo = YahooData()
symbols = yahoo.get_symbol_names()

df = yahoo.get_symbol_all('SPY')
close = df[DF_ADJ_CLOSE]


@njit
def trough_volume(x: np.ndarray):
    current_max = x[0]
    current_max_loc = 0
    xlen = len(x)
    areas = np.zeros(xlen)

    # Record some information about each trough
    start_locs = np.zeros(xlen, dtype=np.int64)
    end_locs = np.zeros(xlen, dtype=np.int64)
    areas_final = np.zeros(xlen)
Ejemplo n.º 5
0
import matplotlib
import matplotlib.pyplot as plt

import backtester
from backtester.stockdata import YahooData
from backtester.definitions import (
    DF_ADJ_CLOSE, DF_CLOSE, DF_OPEN, DF_HIGH, DF_LOW, DF_VOLUME)
from backtester.indicators import TrailingStats

from scipy.signal import savgol_filter
from scipy.ndimage import gaussian_filter1d, uniform_filter1d


symbol = 'APOL'
yd = YahooData()
df = yd.get_symbol_all(symbol)


def set_date_axes(dnum: int):
    ax = plt.gca()    
    plt.xticks(rotation=60, ha='right')
    plt.grid(which='major', color='k', lw=0.25, alpha=0.5)
    plt.grid(which='minor', color='k', lw=0.25, alpha=0.2)
    
    
    years = matplotlib.dates.YearLocator()
    months = matplotlib.dates.MonthLocator()
    days = matplotlib.dates.DayLocator()   
    
    if dnum / 12 > 20:
        ax.xaxis.set_major_locator(years)
Ejemplo n.º 6
0
            current_max = xi
            pratios[ii] = 0
        # Trough detected
        else:
            area += (current_max - xi)
            pratios[ii] = (current_max - xi) / current_max        
    return pratios





yahoo = YahooData()
symbols = yahoo.get_symbol_names()

df = yahoo.get_symbol_all('GOOG')
close = df[DF_ADJ_CLOSE]
xlen = len(close)
iarr = np.arange(xlen)
# a, starts, ends, af = trough_volume(close.values)
# a2, starts2, ends2, af2 = trough_volume(-close.values[::-1])
# starts2 = iarr[::-1][starts2]

ratio = peak_ratio(close.values)
series = pd.Series(data=ratio, index=close.index)
ts = TrailingStats(series, 12)
slope = ts.linear_regression[0]



plt.subplot(2,2,1)