Esempio n. 1
0
    def predict(self, ticker, d, n):
        # getting previous 90 days of stock data
        startDate = datetime.strptime(d, '%Y-%m-%d') - timedelta(
            days=self.history_len + self.history_len)
        # Preparing data for prediction
        data = DataReader(ticker, 'yahoo', start=startDate, end=d)

        # Using last 60 stock data for LSTM model
        inputClosing = data.tail(self.history_len)

        sc = MinMaxScaler(feature_range=(0, 1))  # scaling data
        inputClosing_scaled = sc.fit_transform(inputClosing)

        # output list
        out = []

        #for n days, do predictions
        #appends the results for
        for i in range(n):
            #reshape input for model
            model_input = np.reshape(
                inputClosing_scaled[i:i + self.history_len, :].T,
                (data.shape[1], self.history_len, 1))
            #do predictions

            with self.graph.as_default():
                with self.session.as_default():
                    pred = self.model.predict(model_input)

            #reflip and transform
            predicted_price = sc.inverse_transform(pred.T)
            #put the results to the end of the history array and fix dims
            inputClosing_scaled = np.append(
                inputClosing_scaled,
                [pred]).reshape(inputClosing_scaled.shape[0] + 1,
                                data.shape[1])
            inputClosing_scaled = np.expand_dims(inputClosing_scaled, axis=1)
            #put results to out array
            out.append(predicted_price)

        #convert array to dataframe, use the colums from data
        out = pd.DataFrame(np.concatenate(out), columns=data.columns)
        return out
Esempio n. 2
0
def yahoo_prices(symbols, start_date, verbose = True):
    ticker_df_list = []
    start_dates = {};
    for index, row in symbols.iterrows(): 
        try:
            data = DataReader(row.Ticker, 'yahoo', start_date)
            data['Ref'] = row.Ticker 
            data = data.loc[:, ['Ref', 'Adj Close']]
            data.rename(columns={'Adj Close': 'Price'}, inplace=True)
            if verbose:
                print("{}: Historical Perf: {}".format(row.Ticker, data.tail(1).iloc[0]['Price']/data.head(1).iloc[0]['Price']-1))            
            ticker_df_list.append(data)
            start_dates[row.Ticker] = data.head(1).index[0]
        except Exception as e:
            if verbose:
                print("No data for ticker %s\n%s" % (row.Ticker, str(e)))    
    df = pd.concat(ticker_df_list)   
    cell= df[['Ref','Price']] 
    return cell.pivot(columns='Ref'), start_dates
Esempio n. 3
0
end = date(2017, 11, 06)

# DataReader is a function to import, there are different sources available to import data
# such as ggogle fin, yahoo fin,fred, Oanda(for exchange rates)

# for eg Importing FB data from goolge
stockFb = DataReader('fb', 'google', start, end)
type(stockFb)
# DataReader returns a pandas data frame object

stockFb.head()
stockFb.info()

# from yahoo
stockApl = DataReader('AAPL', 'yahoo', start, end)
stockApl.head()
stockApl.info()

#plotting
stockApl['Close'].plot(title='APPLE')
plt.show()

#sp500 from fred up to now
sp500 = DataReader('SP500', 'fred', start)
#note sys date is deafult for end argument
sp500.tail()
sp500.plot(title='SP500')

#saving locally
sp500.to_csv('SP500')
import pandas as pd
import matplotlib.pyplot as plt
from pandas_datareader.data import DataReader
from datetime import date

start = date(1900,1,1) # default Jan 1, 2010
series_code = 'DGS10'  # 10-year Treasury Rate
data_source = 'fred'  # FED Economic Data Service

data = DataReader(series_code, data_source, start)
data.info()
pd.concat([data.head(3), data.tail(3)])

series_name = '10-year Treasury'
data = data.rename(columns={series_code: series_name})
data.plot(title=series_name)
plt.show()
    def __update(self):
        """Updates class attributes."""
        p, mv, rf = self.__prices, self.__mv, self.__rf
        # Select attributes different from 'None'
        li = [x for x in (p, mv, rf) if x is not None]

        # if there is no element in the list, i.e., if all attributes are 'None'
        if len(li) == 0:
            self.__date = None
        # if there is only one element not 'None' in the list, 'self.__date' should be equal to its index
        elif len(li) == 1:
            self.__date: np.ndarray = li[0].index.to_numpy()
        # if there is at least 2 attributes that are not 'None' we must verify if rows match in length and in values
        else:
            # if lengths match (to prevent ValueError)
            if self.__check_index_length_match(li):
                # if length and values are the same
                if self.__check_index_values_match(li):
                    self.__date = li[0].index.to_numpy().copy()
                # if lengths are equal among each dataset index, but not the values
                else:
                    # if values do not match, we force them to take the same
                    print(
                        "Lengths of rows match, but not they have different values."
                    )
                    self.__date = li[0].index.to_numpy().copy()
                    self.__make_indices_values_match()
                    assert self.__check_index_values_match(li)
            # if any length mismatch, we truncate all DataFrames or Series
            else:
                # Get the oldest date among the list of DataFrames
                min_date = min([df.index.min() for df in li])
                # In the case there is a risk-free rate and that it begins after the other series: try
                # to complete it with the 3 month proxy
                if (self.__rf is not None) & (self.__rf.index[0] > min_date):
                    # Get initial date of the risk-free rate series
                    end = rf.index[0]
                    # 3-Month Treasury Constant Maturity Rate (GS3M)
                    rf3m = DataReader('GS3M', 'fred', start=min_date,
                                      end=end).resample('MS').mean()
                    # We have to drop the last row to prevent overlapping
                    # We couldn't have used timedelta to go back 1 month as some have 31 days while others 30
                    rf3m.drop(rf3m.tail(1).index, inplace=True)
                    rf3m.columns = rf.columns
                    rf3m = rf3m.div(100).div(12)
                    # Concatenate both risk-free rates pd.Series
                    rf_concat = pd.concat([rf3m, self.__rf], sort=True)
                    errmsg: str = f"Got {rf_concat.shape} shape, but ({len(li[0].index)}, 1) expected."
                    assert rf_concat.shape[1] == 1, errmsg
                    self.__rf = rf_concat
                    # Join both series in a sole one
                    # self.__rf = rf_concat.iloc[:, 0].add(rf_concat.iloc[:, 1], fill_value=0)
                else:
                    # Truncate rows of different length according to their dates
                    self.__truncate_rows()
                    # Verify if the rows were correctly truncated
                    not_none_attributes_list = self.__among_not_none_attributes(
                    )
                    err_message = "Rows were not correctly truncated"
                    assert self.__check_index_length_match(
                        not_none_attributes_list), err_message
                    # Update the 'self.__date' attribute with the first item
                    self.__date = not_none_attributes_list[0].index.to_numpy(
                    ).copy()
                    # Propagate same indexes to the other datasets to force a perfect match
                    self.__make_indices_values_match()

                    # Verify that indices have same indexes
                    err_message = "Values do not match among not 'None' attributes."
                    assert self.__check_index_values_match(
                        self.__among_not_none_attributes()), err_message
                self.__update()
Esempio n. 6
0
#let's get an overall look at the average daily return using a histogram.
# We'll use seaborn to create both a histogram and kde plot on the same figure.

# Note the use of dropna() here, otherwise the NaN values can't be read by seaborn
#sns.distplot(Stocks[stock1]['Daily Return'].dropna(),bins=100,color='purple')
#plt.show()

#******************************************************************
# Section 3: Stock analysis for all stock list
#******************************************************************
# Grab all the closing prices for the tech stock list into one DataFrame
closing_df = DataReader(tech_list, 'google', start, end)['Close']

# Let's take a quick look
print(closing_df.tail())

# Make a new tech returns DataFrame for all stocks
tech_rets = closing_df.pct_change()

# Comparing Google to itself should show a perfectly linear relationship
#sns.jointplot(stock1,stock1,tech_rets,kind='scatter',color='seagreen')
#plt.show()

# use seaborn for multiple comparison analysis
# Set up our figure by naming it returns_fig, call PairPLot on the DataFrame
#returns_fig = sns.PairGrid(tech_rets.dropna())

# Using map_upper we can specify what the upper triangle will look like.
#returns_fig.map_upper(plt.scatter,color='purple')
class RLModel():
    def __init__(self, rl_model, pred_model, sent_crawler):

        self.rl_model = rl_model
        #length of observation
        self.obs_len = 14
        #date for scaling historical data
        self.scaling_start_date = '2016-01-01'

        self.pred_model = pred_model
        self.sent_crawler = sent_crawler

        self.scaling_df = pd.DataFrame()
        self.history_df = pd.DataFrame()

        self.last_ticker = ''
        self.last_day = ''

    def _scale_df(self, target_df):
        price_columns = ['Open', 'High', 'Low', 'Close', 'Adj Close']
        high = self.scaling_df['High'].max()
        low = self.scaling_df['Adj Close'].min()
        diff = high - low

        #scale stock info by same scales
        target_df[price_columns] = target_df[price_columns].applymap(
            lambda x: ((x - low) / diff))

        #scale volume by itself
        scaler = MinMaxScaler()
        scaler.fit(self.scaling_df['Volume'].to_numpy().reshape(-1, 1))
        target_df['Volume'] = scaler.transform(
            target_df['Volume'].to_numpy().reshape(-1, 1))

        return target_df

    def get_action_from_sent(self, ticker, day, sentiment_df):

        day = datetime.strptime(day, '%Y-%m-%d').strftime('%Y-%m-%d')

        #if the historical data doesnt match the ticker and day
        if not ((self.last_ticker == ticker) and (self.last_day == day)):
            print('building historical data in sent')
            self.build_history(ticker, day)

        #merge historical data and sentiment data
        obs_df = pd.merge(self.history_df, sentiment_df, on='Date')
        obs_df.fillna(inplace=True, value=0)

        #make lstm prediction data
        pred_df = self.pred_model.predict(ticker, day, self.obs_len // 2)

        #combine historical data and prediction data
        obs_df = obs_df.append(pred_df, ignore_index=True)
        # set sentiment for future pred at 0
        obs_df.fillna(inplace=True, value=0)

        #scale data
        obs_df = self._scale_df(obs_df)
        print(obs_df)

        #rearrange input to match the training data
        col_order = [
            'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume',
            'Sentiment Score'
        ]
        obs_df = obs_df[col_order]

        #make rl prediction
        action, _states = self.rl_model.predict(obs_df)

        return action

    # this method wraps data collection and prediction into one method
    def get_action(self, ticker, day):

        day = datetime.strptime(day, '%Y-%m-%d').strftime('%Y-%m-%d')

        #if the historical data doesnt match the ticker and day
        if not ((self.last_ticker == ticker) and (self.last_day == day)):
            print('building historical data')
            self.build_history(ticker, day)

        #get sentiment
        sentiment_df = self.sent_crawler.get_sentiment(ticker,
                                                       self.history_df.index)

        return self.get_action_from_sent(ticker, day, sentiment_df)

    #builds the historical portion of the stock data using yahoo datareader
    #This call should be done before calling any of the get_action variants, but it should handle fine with last

    #returns - datetime index to use for getting sentiment from firebase for historical data
    def build_history(self, ticker, day):

        self.last_ticker = ticker
        self.last_day = datetime.strptime(day, '%Y-%m-%d').strftime('%Y-%m-%d')

        #get data
        history_start_date = datetime.strptime(
            day, '%Y-%m-%d') - timedelta(days=self.obs_len)

        self.scaling_df = DataReader(ticker,
                                     'yahoo',
                                     start=self.scaling_start_date)
        self.history_df = DataReader(ticker,
                                     'yahoo',
                                     start=history_start_date,
                                     end=day)
        self.history_df = self.history_df.tail(self.obs_len // 2)

        # print(self.history_df)
        # print(sent_df)

        #returns index to get correct sentiment from firebase
        return self.history_df.index


# test_model = RLModel(cycle_base_model, lstm)
# test_model.get_action('AAPL', '2019-01-01')
Esempio n. 8
0
          )  # secondary_y: column on tight axis with different scale
plt.tight_layout()  # improving layout by reducing white spaces
plt.show()

####################
import pandas as pd
from pandas_datareader.data import DataReader
from datetime import date

start = date(2015, 1, 1)  # default Jan 1, 2010
end = date(2016, 12, 31)  # default: today
ticker = 'GOOG'
data_source = 'google'
stock_data = DataReader(ticker, data_source, start, end)
stock_data.info()
pd.concat([stock_data.head(3), stock_data.tail(3)])
stock_data.tail(3)
import matplotlib.pyplot as plt

stock_data['Close'].plot(title=ticker)
plt.show()

# ------------------------ FRED

import pandas as pd
from pandas_datareader.data import DataReader
from datetime import date

start = date(1962, 1, 1)  # default Jan 1, 2010
series_code = 'DGS10'  # 10-year Treasury Rate
data_source = 'fred'  # FED Economic Data Service
Esempio n. 9
0
port_trend = []
port_close = []
port = []
#def port_perf(portfolio_equities, start_date, end_date):
#stocks_list = 'sp500'
#tickers = retrieve_tickers.retrieve_tickers(stocks_list)
#td = datetime.datetime.now().strftime('%Y-%m-%d')


# Write 2 DataFrames
for i in portfolio_equities:
	port =  DataReader(i, "yahoo", start_date, end_date)
	#port['Date'] = port.index
	port['Symbol'] = i
	port_close.append(port.tail(1))
print port_close['Symbol']

#port = DataReader(portfolio_equities, "yahoo", start_date, end_date)	
#print port.to_frame()


# 2. Retrieve stock data







Esempio n. 10
0
import gym
import custom_anytrading

from stable_baselines.common.policies import MlpPolicy
from stable_baselines.common import make_vec_env
from stable_baselines import PPO2

from sklearn.preprocessing import MinMaxScaler

print('loading data')
data = DataReader('AAPL', 'yahoo', start='2000-01-01', end='2019-01-01')

print(data.head)

test_data = data.tail(200)
train_data = data.head(-500)

env = gym.make('custom_stocks-v0',
               stock_df=train_data,
               pred_df=train_data,
               window_size=14,
               initial_balance=5000,
               min_percent_loss=.25,
               with_pred=False)

test_env = gym.make('custom_stocks-v0',
                    stock_df=test_data,
                    pred_df=test_data,
                    window_size=14,
                    initial_balance=5000,
Esempio n. 11
0
import pandas as pd
from pandas import Series, DataFrame
import numpy as np

app = Flask(__name__)

import matplotlib.pyplot as plt

from pandas_datareader.data import DataReader #モジュールが変わったため変更
from datetime import datetime

end = datetime.now()
start = datetime(end.year - 1, end.month, end.day)
toyota = DataReader('TM', 'yahoo', start, end)


print(toyota.tail())

dfs=toyota.tail()

header=pd.DataFrame(dfs)
record = header.values.tolist()

@app.route('/')
def index():
  return render_template('index.html', header=header, record=record)

if __name__ == '__main__':
  app.run()

Esempio n. 12
0
df.index = pd.to_datetime(df.index, format='%Y-%m-%d')
df['year'] = df.index.year.values
df['month'] = df.index.month.values
df['day'] = df.index.day.values

df.head()

# print(year)
df['date'] = df['year'].astype(str) + '-' + df['month'].astype(
    str) + '-' + df['day'].astype(str)

# df['date'] = pd.to_datetime(df[['year', 'month','day']])
df.date.values

df.tail()

df.close.plot(figsize=(12, 8), title='MU')

# for i, (index, row) in enumerate(df.iterrows()):
# print (row)
# print( df.loc[df.index[ i - 4 ], 'close'])
# df.at[index,'Momentum_function']

# Selected technical indicators and their formulas (Type 1).
# Stochastic %K
lowest = df['low'].rolling(window=4).min()
df['Stochastic_k'] = pd.Series((df['close'] - lowest)) / (df['high'] - lowest)


# Calculating in two different ways