Exemple #1
0
def get_data(day_range):
    full_apple_train = get_stock_data("AAPL", "2003-02-10", "2004-09-12")
    last_59_train = full_apple_train[-(day_range - 1):]
    part_apple_test = get_stock_data("AAPL", "2004-09-13", "2005-01-22")
    full_apple_test = last_59_train.append(part_apple_test)
    full_ibm_train = get_stock_data("IBM", "2003-02-10", "2004-09-12")
    last_59_train = full_ibm_train[-(day_range - 1):]
    part_ibm_test = get_stock_data("IBM", "2004-09-13", "2005-01-22")
    full_ibm_test = last_59_train.append(part_ibm_test)
    return full_ibm_train, full_ibm_test, full_apple_train, full_apple_test
def test_get_stock_data():
    stock_df = get_stock_data(PHISIX_SYMBOL,
                              DATE_START,
                              DATE_END,
                              source="phisix")
    assert isinstance(stock_df, pd.DataFrame)

    stock_df = get_stock_data(YAHOO_SYMBOL,
                              DATE_START,
                              DATE_END,
                              source="yahoo")
    assert isinstance(stock_df, pd.DataFrame)
Exemple #3
0
def test_get_stock_data():
    # Test w/ respective sources
    stock_df = get_stock_data(
        PHISIX_SYMBOL, DATE_START, DATE_END, source="phisix"
    )
    assert isinstance(stock_df, pd.DataFrame)

    stock_df = get_stock_data(
        YAHOO_SYMBOL, DATE_START, DATE_END, source="yahoo"
    )
    assert isinstance(stock_df, pd.DataFrame)

    # Test getting yahoo when (default) phisix fails on a non PSE SYMBOL
    stock_df = get_stock_data(YAHOO_SYMBOL, DATE_START, DATE_END)
    assert isinstance(stock_df, pd.DataFrame)
Exemple #4
0
    def get_ticker_data(self):
        if (self.period):
            today = date.today()
            yesterday = today - timedelta(days=1)
            try:
                arr_data = get_stock_data("TSLA", "2015-01-01", yesterday)
                print(arr_data)
            except Exception as e:
                print('get stock data error, query misformed line 20')
                print(e)
            dates = []
            print(arr_data.iloc)
            for i in reversed(range(len(arr_data.index))):
                print(1)
                dates.append(arr_data.iloc[i].name)
                if len(dates) == self.period:
                    break
            result = []
            for b in reversed(dates):
                result.append(b)
            all_dates = []
            for c in range(len(arr_data.index)):
                all_dates.append(arr_data.iloc[c].name)

            return result, arr_data, all_dates
Exemple #5
0
    def predict(self):
        # use actual ticker this sent-model is supposed to predict
        # TODO: this is why we need more data... cant be just using the same data as from
        #       from training, when enough is present, we'll use different start and end dates
        #       for both training and testing... issue is also that the observations are from training
        #       we will need to pull observations from some testing date set. In reality this model
        #       is like the others used for just next day prediction... again in the future with more
        #       data we could possibly use multiple previous days of sentiment to predict the next day
        #       we have to add 1 day since fastquant is not inclusive of the end date
        test_data = get_stock_data(self.ticker, self.start_date,
                                   self.end_date + timedelta(days=1))

        # again we ignore the first day, since we dont have the day before its sentiment
        test_data = test_data[1:]

        # true opening and closing prices... same type of prediction model as ghmm and rnn's
        # however here we cant use our predicted fractional change as part of the new observations
        # we actually have to have the average sentiment for a previous day... tricky model here...
        actual = test_data['close'].values
        opens = test_data['open'].values

        # observations again should come from a test set when we have enough data
        obs = self.train_data[self.train_data['ticker'] ==
                              self.ticker]['avgSent'].values

        # loop through test data and predict closing prices using opening and the model
        preds = []
        for i in range(len(test_data)):
            pred_frac_change = self.model.predict(obs[i].reshape(-1, 1))
            pred_close = pred_frac_change[0] * opens[i] + opens[i]
            preds.append(pred_close)

        return np.array(preds).flatten(), actual
Exemple #6
0
    def gen_data(self, ticker):
        # grab only tweets corresponding to the given ticker
        ticker_data = self.tweets[self.tweets['ticker'] == ticker]

        # collect the stock price data for the given ticker from date range
        ticker_price_data = get_stock_data(ticker, self.start_date,
                                           self.end_date + timedelta(days=1))

        # convert data to fractional change
        ticker_frac_change = self.data_prep(ticker_price_data)

        # since we are using prev sentiment to predict next day, first frac change is useless
        ticker_frac_change = ticker_frac_change[1:]

        # calculate the average sentiment over unique dates
        avg_sent = [
            self.calc_avg_sentiment(ticker, date) for date in self.dates
        ]

        # convert average sent to predict uptrend or downtrend
        avg_sent = [
            'UPTREND' if sent > 0 else 'DOWNTREND' for sent in avg_sent
        ]

        # create cols in the data frame for the average sentiment and the ticker
        ticker_frac_change['SentPred'] = avg_sent
        ticker_frac_change['ticker'] = ticker

        return ticker_frac_change
Exemple #7
0
    def rolling_window_test(self):
        # train on 1155 points, test on 10 points
        # slide window over by testing_size each time to get 10 tests
        training_size = 1155
        testing_size = 10

        for test in self.tests.values():
            # var to store error and test num
            error = 0
            test_n = 0

            # collect the data for the window
            window_params = test['window']
            ticker = window_params['ticker']

            window = get_stock_data(ticker, window_params['start'],
                                    window_params['end'])

            # 10 tests within the window
            for i in range(0, 100, 10):
                train_data = window.iloc[i:i + training_size]
                test_data = window.iloc[i + training_size:i + training_size +
                                        testing_size]

                print(f'window {i+1}')

                # make the model
                self.model = self.Model(params=self.params)

                # train and predict
                self.model.train(train_data=train_data)
                preds, actuals = self.model.predict(test_data=test_data)

                # get error for this window
                error += self.model.mean_abs_percent_error(y_pred=preds,
                                                           y_true=actuals)
                test_n += 1

                print('DONE')

            # use last window for plotting
            if self.plot:
                self.model.plot_continuous(
                    preds=preds,
                    train=train_data,
                    actual=actuals,
                    title=
                    f'{self.model.name} {ticker} forecasted vs actual continuous stock price'
                )

            # store average MAPE error
            avg_error = error / test_n
            self.results[f'{self.model.name}:{ticker}'] = avg_error

        # write errors to file
        dump = json.dumps(self.results)
        output_file = open(self.f, 'w')
        output_file.write(dump)
        output_file.close()
Exemple #8
0
def test_backtest():
    """
    Ensures that the backtest function works on all the registered strategies, with their default parameter values
    """
    sample = pd.read_csv(SAMPLE_CSV, parse_dates=["dt"])
    # Simulate custom indicator
    sample["custom"] = np.random.random((sample.shape[0], )) * 100

    for strategy in STRATEGY_MAPPING.keys():
        if strategy == "sentiment":
            data = get_yahoo_data("TSLA",
                                  "2020-01-01",
                                  "2020-07-04",
                                  dividends=True)
            # use cached data instead of scraping for tests purposes.
            # sentiments = get_bt_news_sentiment(keyword="tesla", page_nums=2)
            with open(SENTI_PKL, "rb") as handle:
                sentiments = pickle.load(handle)
            cerebro = backtest(strategy,
                               data,
                               sentiments=sentiments,
                               senti=0.4,
                               plot=False)
            errmsg = "Backtest encountered error for strategy '{}'!".format(
                strategy)
            assert cerebro is not None, errmsg

            data_disclosures = get_stock_data(
                "TSLA",
                "2020-01-01",
                "2020-09-30",
                dividends=True,  # source="phisix"
            )

            # sentiments_disclosures = get_disclosure_sentiment(
            #     stock_code="JFC",
            #     start_date="2020-07-01",
            #     end_date="2020-09-30",
            # )

            with open(DISCLOSURE_PKL, "rb") as handle_disclosures:
                sentiments_disclosures = pickle.load(handle_disclosures)

            cerebro_disclosures = backtest(
                strategy,
                data_disclosures,
                sentiments=sentiments_disclosures,
                senti=0.2,
                plot=False,
            )
            errmsg_disclosures = "Backtest encountered error for strategy '{}'!".format(
                strategy)
            assert cerebro_disclosures is not None, errmsg_disclosures

        else:
            cerebro = backtest(strategy, sample, plot=False)
            errmsg = "Backtest encountered error for strategy '{}'!".format(
                strategy)
            assert cerebro is not None, errmsg
Exemple #9
0
 def get_data(self):
     dfs = []
     for i in self.stock_list:
         df = get_stock_data(i, self.start_date, self.end_date)
         df.columns = [i]
         dfs.append(df)
     data = pd.concat(dfs, axis=1)
     data.index.name = 'DATE'
     return data
 def get_ticker_data(self):
     if(self.name):
         today = date.today() 
         yesterday = today - timedelta(days = 1) 
         try:
             pd_data = get_stock_data(self.name, "2017-01-01", yesterday)
             np_data = pd_data.values
         except Exception as e:
             print('get stock data error, query misformed line 20')
             print(e)
         return np_data, pd_data
Exemple #11
0
 def __init__(self, ticker, period, fast_period, slow_period):
     today = datetime.datetime.now()
     delta = today - datetime.timedelta(days=period)
     self.period = period
     self.slow_period = slow_period
     self.fast_period = fast_period
     self.end_date = today
     self.start_date = delta
     self.tkr = ticker
     self.df = get_stock_data(self.tkr, self.start_date , self.end_date)
     self.MACD()
     self.RSI()
     self.df['SMA_slow'] = self.SMA(period=self.slow_period)
     self.df['SMA_fast'] = self.SMA(period=self.fast_period)
Exemple #12
0
    def get_stock_data(self, format="ohlc"):
        """overwrites get_stock_data

        Note that stock data requires YYYY-MM-DD
        """
        start_date = format_date(self.start_date,
                                 informat=CALENDAR_FORMAT,
                                 outformat="%Y-%m-%d")
        end_date = format_date(self.end_date,
                               informat=CALENDAR_FORMAT,
                               outformat="%Y-%m-%d")
        if self.verbose:
            print("Pulling {} stock data...".format(self.symbol))
        data = get_stock_data(
            self.symbol,
            start_date=start_date,
            end_date=end_date,
            format=format,
        )
        self.stock_data = data
        return data
Exemple #13
0
    def predict(self):
        test_data = get_stock_data(self.ticker, self.start_date,
                                   self.end_date + timedelta(days=1))

        # again we ignore the first day, since we dont have the day before its sentiment
        test_data = test_data[1:]

        actual = test_data['close'].values
        actual = ['UPTREND' if a > 0 else 'DOWNTREND' for a in actual]

        # observations again should come from a test set when we have enough data
        obs = self.train_data[self.train_data['ticker'] ==
                              self.ticker]['SentPred'].values

        # loop through test data and and see how often we are correct
        correct = 0
        for i in range(len(test_data)):
            if obs[i] == actual[i]:
                correct += 1

        return correct / len(actual)
Exemple #14
0
    def get_stock_data(self, format="dohlc"):
        """overwrites get_stock_data

        Note that stock data requires YYYY-MM-DD
        """
        start_date = format_date(self.start_date,
                                 informat=CALENDAR_FORMAT,
                                 outformat="%Y-%m-%d")
        end_date = format_date(self.end_date,
                               informat=CALENDAR_FORMAT,
                               outformat="%Y-%m-%d")
        if self.verbose:
            print("Pulling {} stock data...".format(self.symbol))
        data = get_stock_data(
            self.symbol,
            start_date=start_date,
            end_date=end_date,
            format=format,
        )
        data["dt"] = pd.to_datetime(data.dt)
        # set dt as index
        data = data.set_index("dt")
        self.stock_data = data
        return data
def query_data():
    return get_stock_data('baba', "2020-01-28", "2021-01-28")
Exemple #16
0
from fastquant import get_stock_data

df = get_stock_data("PTT.BK", "2015-01-01", "2020-08-30")
Exemple #17
0
from fastquant import backtest, get_stock_data
df = get_stock_data("AAAA", "2018-01-01", "2021-04-01")

res,plot = backtest('macd', df, init_cash=2000, fast_period=12, slow_period=26, signal_period=9, sma_period=30, dir_period=10, return_plot=True)
#plot.savefig('AAAA_macd.png')
from fastquant import get_stock_data, backtest
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from datetime import date, timedelta
from tapy import Indicators

arr = []

df = get_stock_data('tsla', "2017-01-01", "2020-01-01")
print(df.columns)
df.columns = ['Open', 'High', 'Low', 'Close', 'Volume']
indicators = Indicators(df)
indicators.awesome_oscillator(column_name='AO')
indicators.sma(column_name='sma')
# Indicators.fractals(column_name_high='fractals_high', column_name_low='fractals_low')
indicators.fractals(column_name_high='fractal_highs',
                    column_name_low='fractal_lows')
df = indicators.df
print(df.tail())
plt.plot(df['sma'])
plt.plot(df['AO'])
plt.plot(df['fractal_highs'])
plt.plot(df['fractal_lows'])
plt.show()
Exemple #19
0
from fastquant import get_stock_data, backtest
from datetime import date

today = date.today()

df = get_stock_data("WFG.TO", "2015-09-01", today)

backtest('smac', df, fast_period=25, slow_period=52)

# res = backtest("smac", df, fast_period=range(10, 30, 3), slow_period=range(40, 55, 3), verbose=False)
# print(res[['fast_period', 'slow_period', 'final_value']].head())
#
 def get_data(self, ticker, start_date, end_date):
     return get_stock_data(ticker, start_date, end_date)
from fastquant import get_crypto_data, get_stock_data, backtest
from fbprophet import Prophet
from matplotlib import pyplot as plt
from fbprophet.diagnostics import cross_validation
from sklearn.metrics import mean_squared_error
import numpy as np
import pandas as pd

#max # of records is 500
#outter_df = get_crypto_data("BTC/USDT", "2019-01-01", "2020-05-31")
outter_df = get_stock_data("JFC", "2018-01-01", "2019-05-31")

#oneyroneqtr=456
oneyroneqtr = 315
length = len(outter_df[0:oneyroneqtr])

custompre = pd.DataFrame()


def MAPE(Y_actual, Y_Predicted):
    mape_ = np.mean(np.abs((Y_actual - Y_Predicted) / Y_actual)) * 100
    return mape_


for x in range(0, len(outter_df)):
    frames = outter_df.index[x:(x + oneyroneqtr)]
    length = len(frames)
    if length == oneyroneqtr:
        #print(outter_df.loc[frames])

        df = outter_df.loc[frames]
from fastquant import get_stock_data
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, GRU
import tensorflow as tf
import config as cf

# parameters from config.py

train_data = get_stock_data("IBM", "2006-01-01", "2016-12-31")['high'].values
test_data = get_stock_data("IBM", "2017-01-01", "2017-12-31")['high'].values

train_data = train_data.reshape(-1, 1)
test_data = test_data.reshape(-1, 1)

sc = MinMaxScaler(feature_range=(0, 1))
train_scale = sc.fit_transform(train_data)

test_scale = sc.transform(test_data)

X_train = []
Y_train = []

for i in range(60, len(train_scale)):
    X_train.append(train_scale[i - 60:i, 0])
    Y_train.append(train_scale[i, 0])

x_train, y_train = np.array(X_train), np.array(Y_train)
Exemple #23
0
                )[["cleaned_tweet"]].sample(n=1).iat[0, 0])
        else:
            st.markdown(
                f"No tweet for {random_stock} between {start_date} and {end_date}"
            )

    # Divider
    st.markdown(f"<hr>", unsafe_allow_html=True)

    # NOTE: Loading Stock Price data from fastquant
    st.markdown(
        f"<h2 style='color: black;'> Stock Price Action for <span style='font-weight: bold;'> {random_stock} </span></h2>",
        unsafe_allow_html=True)
    stock_fq = random_stock.lstrip('$')
    stock_df = get_stock_data(stock_fq,
                              start_date - datetime.timedelta(days=7),
                              date.today())
    # stock_df = stock_df.join
    fig1 = px.line(stock_df, x="dt", y="close")
    st.plotly_chart(fig1)

    # NOTE: DataFrame of Raw Data
    st.markdown(
        f"<h2 style='color: black;'> <span style='font-weight: bold;'> {random_stock} </span> Tweets based on date range </h2>",
        unsafe_allow_html=True)
    modified_data = data.loc[(data['tweet_created_at'].dt.date >= start_date) &
                             (data['tweet_created_at'].dt.date <= end_date) &
                             (data['stock_code'] == random_stock),
                             ['username', 'tweet_created_at', 'cleaned_tweet']]
    st.markdown(
        f"{len(modified_data)} tweet/s between {start_date} and {end_date}")
Exemple #24
0
                self.model.plot_continuous(
                    preds=preds,
                    train=train_data,
                    actual=actuals,
                    title=
                    f'{self.model.name} {ticker} forecasted vs actual continuous stock price'
                )

            # store average MAPE error
            avg_error = error / test_n
            self.results[f'{self.model.name}:{ticker}'] = avg_error

        # write errors to file
        dump = json.dumps(self.results)
        output_file = open(self.f, 'w')
        output_file.write(dump)
        output_file.close()


if __name__ == "__main__":
    test = rolling_window_tests['test6']['window']
    df = get_stock_data(test['ticker'], test['start'], test['end'])
    print('df')
    print(df)
    for i in range(0, 100, 10):
        train = df.iloc[i:i + 1155]
        test = df.iloc[i + 1155:i + 1155 + 10]
        print(i)
        print(train)
        print(test)
Exemple #25
0
from fastquant import backtest, get_stock_data
import pickle
from gaussian_hmm import *
import json
#from forecasting_train_predictor import *
#from overfit_train_predictor import *

# want to train on this data, need access to gaussian_hmm file
df = get_stock_data('AAPL', '2020-01-01', '2020-12-31')

params = {
    'n_components': 2,
    'algorithm': 'map',
    'n_iter': 100,
    'd': 5,
    'name': 'GHMM'
}


def percentage_gain(init, end):
    return (end - init) / init


training = {
    'AAPL': {
        'start': '2017-08-01',
        'end': '2019-01-01'
    },
    'IBM': {
        'start': '2017-08-01',
        'end': '2019-01-01'
def test_get_stock_data():
    stock_df = get_stock_data(PHISIX_SYMBOL, DATE_START, DATE_END)
    assert isinstance(stock_df, pd.DataFrame)

    stock_df = get_stock_data(YAHOO_SYMBOL, DATE_START, DATE_END)
    assert isinstance(stock_df, pd.DataFrame)
Exemple #27
0
    def rolling_window_test(self,
                            folder,
                            windows=10,
                            train_size=1300,
                            test_size=100):
        for test in self.tests.values():
            # var to store error and test num
            error = 0
            test_n = 0

            # collect the data for the window
            window_params = test['window']
            ticker = window_params['ticker']

            window = get_stock_data(ticker, window_params['start'],
                                    window_params['end'])
            print('Window shape: ', window.shape)
            data_size = window.shape[0]
            # testing_size = 10

            index = 0
            for i in range(0, windows * test_size, test_size):
                print('Train range: ', i, '-', i + train_size)
                print('Test range: ', i + train_size, '-',
                      i + train_size + test_size)
                train_data = window.iloc[i:i + train_size]
                test_data = window.iloc[i + train_size:i + train_size +
                                        test_size]
                print('Window train data shape: ', train_data.shape)
                print('Window test data shape: ', test_data.shape)

                # print(f'window {i+1}')

                # Make the model and train_predictor
                self.model_class = self.Model(params=self.params)
                self.model = self.model_class.gen_model()
                self.train_predictor = self.Train_Predictor(params=self.params)

                # train and predict
                self.model, model_history = self.train_predictor.train(
                    model=self.model,
                    train_data=self.model_class.preprocess_data(train_data),
                    label_column_index=self.model_class.label_column_index)
                preds, actuals = self.train_predictor.predict(
                    model=self.model,
                    test_data=self.model_class.preprocess_data(test_data),
                    label_column_index=self.model_class.label_column_index)

                # get error for this window
                if self.params[
                        'loss'] == "mean_absolute_percentage_error":  # updated to keras' name for the loss
                    error += self.model_class.mean_abs_percent_error(
                        y_pred=preds, y_true=actuals)
                elif self.params['loss'] == "root_mean_squared_error":
                    error += self.model_class.root_mean_squared_error(
                        y_pred=preds, y_true=actuals)
                # A bit weird - ask Rowan
                elif self.params['loss'] == "mean_squared_error":
                    error += self.model_class.mean_abs_percent_error(
                        y_pred=preds, y_true=actuals)
                else:
                    raise ValueError(
                        "Loss parameter isn't programmed or incorrect. Loss parameter: "
                        + self.params['loss'])
                test_n += 1

                print('DONE')

                # use last window for plotting
                if self.plot:
                    self.model_class.plot_results(
                        preds=preds,
                        actual=actuals,
                        title=
                        f'{self.model_class.name} {ticker} Window {index+1} forcasted vs actual stock prices {window_params["start"]} to {window_params["end"]}',
                        folder=folder)
                    if model_history is not None:
                        self.model_class.plot_loss(
                            t_loss=model_history.history['loss'],
                            v_loss=model_history.history['val_loss'],
                            title=
                            f'{self.model_class.name} {ticker} Window {index+1} train vs validation loss',
                            folder=folder)

                print('DONE PLOTTING')
                index += 1

            # store average MAPE error
            avg_error = error / test_n
            self.results[f'{self.model_class.name}:{ticker}'] = avg_error

        # write errors to file
        json_file = f'{folder}{self.model_class.name}.json'
        dump = json.dumps(self.results)
        output_file = open(json_file, 'w')
        output_file.write(dump)
        output_file.close()
from fastquant import backtest, get_stock_data
jfc = get_stock_data("JFC", "2018-01-01", "2019-01-01")
backtest('smac', jfc, fast_period=15, slow_period=40)
Exemple #29
0
from fastquant import get_stock_data, backtest
import matplotlib.pyplot as plt
import numpy as np

tsla = get_stock_data("TSLA", "2018-01-01", "2019-01-01")
# .iloc[index] returns you index obeject complete frame 'iloc'
# .index[index] returns date or 'index' of frame
# .loc['volume', 'close'] ranges of columns
# .attrs returns dict keys
print(tsla.iloc[0].high)
HIGHS = []
LOWS = []
MED_PRICE = []
FIVE_DAY = []
THIRTY_FOUR_DAY = []
AO = []
INDEX = []


def get_high_low():
    for i in range(len(tsla)):
        HIGHS.append([tsla.iloc[i].name, tsla.iloc[i].high])
        LOWS.append([tsla.iloc[i].name, tsla.iloc[i].low])


def get_med_price():
    for i in range(len(HIGHS)):
        MED_PRICE.append([HIGHS[i][0], [(HIGHS[i][1] + LOWS[i][1]) / 2]])
        print(MED_PRICE)

Exemple #30
0
def load_data(in_inputs):
    train_data = get_stock_data(ticker, train_start_date, train_end_date)[in_inputs].values
    test_data = get_stock_data(ticker, test_start_date, test_end_date)[in_inputs].values

    train_data = train_data.reshape(-1,1)
    test_data = test_data.reshape(-1,1)