def predict(self, ticker, d, n): # getting previous 90 days of stock data startDate = datetime.strptime(d, '%Y-%m-%d') - timedelta( days=self.history_len + self.history_len) # Preparing data for prediction data = DataReader(ticker, 'yahoo', start=startDate, end=d) # Using last 60 stock data for LSTM model inputClosing = data.tail(self.history_len) sc = MinMaxScaler(feature_range=(0, 1)) # scaling data inputClosing_scaled = sc.fit_transform(inputClosing) # output list out = [] #for n days, do predictions #appends the results for for i in range(n): #reshape input for model model_input = np.reshape( inputClosing_scaled[i:i + self.history_len, :].T, (data.shape[1], self.history_len, 1)) #do predictions with self.graph.as_default(): with self.session.as_default(): pred = self.model.predict(model_input) #reflip and transform predicted_price = sc.inverse_transform(pred.T) #put the results to the end of the history array and fix dims inputClosing_scaled = np.append( inputClosing_scaled, [pred]).reshape(inputClosing_scaled.shape[0] + 1, data.shape[1]) inputClosing_scaled = np.expand_dims(inputClosing_scaled, axis=1) #put results to out array out.append(predicted_price) #convert array to dataframe, use the colums from data out = pd.DataFrame(np.concatenate(out), columns=data.columns) return out
def yahoo_prices(symbols, start_date, verbose = True): ticker_df_list = [] start_dates = {}; for index, row in symbols.iterrows(): try: data = DataReader(row.Ticker, 'yahoo', start_date) data['Ref'] = row.Ticker data = data.loc[:, ['Ref', 'Adj Close']] data.rename(columns={'Adj Close': 'Price'}, inplace=True) if verbose: print("{}: Historical Perf: {}".format(row.Ticker, data.tail(1).iloc[0]['Price']/data.head(1).iloc[0]['Price']-1)) ticker_df_list.append(data) start_dates[row.Ticker] = data.head(1).index[0] except Exception as e: if verbose: print("No data for ticker %s\n%s" % (row.Ticker, str(e))) df = pd.concat(ticker_df_list) cell= df[['Ref','Price']] return cell.pivot(columns='Ref'), start_dates
end = date(2017, 11, 06) # DataReader is a function to import, there are different sources available to import data # such as ggogle fin, yahoo fin,fred, Oanda(for exchange rates) # for eg Importing FB data from goolge stockFb = DataReader('fb', 'google', start, end) type(stockFb) # DataReader returns a pandas data frame object stockFb.head() stockFb.info() # from yahoo stockApl = DataReader('AAPL', 'yahoo', start, end) stockApl.head() stockApl.info() #plotting stockApl['Close'].plot(title='APPLE') plt.show() #sp500 from fred up to now sp500 = DataReader('SP500', 'fred', start) #note sys date is deafult for end argument sp500.tail() sp500.plot(title='SP500') #saving locally sp500.to_csv('SP500')
import pandas as pd import matplotlib.pyplot as plt from pandas_datareader.data import DataReader from datetime import date start = date(1900,1,1) # default Jan 1, 2010 series_code = 'DGS10' # 10-year Treasury Rate data_source = 'fred' # FED Economic Data Service data = DataReader(series_code, data_source, start) data.info() pd.concat([data.head(3), data.tail(3)]) series_name = '10-year Treasury' data = data.rename(columns={series_code: series_name}) data.plot(title=series_name) plt.show()
def __update(self): """Updates class attributes.""" p, mv, rf = self.__prices, self.__mv, self.__rf # Select attributes different from 'None' li = [x for x in (p, mv, rf) if x is not None] # if there is no element in the list, i.e., if all attributes are 'None' if len(li) == 0: self.__date = None # if there is only one element not 'None' in the list, 'self.__date' should be equal to its index elif len(li) == 1: self.__date: np.ndarray = li[0].index.to_numpy() # if there is at least 2 attributes that are not 'None' we must verify if rows match in length and in values else: # if lengths match (to prevent ValueError) if self.__check_index_length_match(li): # if length and values are the same if self.__check_index_values_match(li): self.__date = li[0].index.to_numpy().copy() # if lengths are equal among each dataset index, but not the values else: # if values do not match, we force them to take the same print( "Lengths of rows match, but not they have different values." ) self.__date = li[0].index.to_numpy().copy() self.__make_indices_values_match() assert self.__check_index_values_match(li) # if any length mismatch, we truncate all DataFrames or Series else: # Get the oldest date among the list of DataFrames min_date = min([df.index.min() for df in li]) # In the case there is a risk-free rate and that it begins after the other series: try # to complete it with the 3 month proxy if (self.__rf is not None) & (self.__rf.index[0] > min_date): # Get initial date of the risk-free rate series end = rf.index[0] # 3-Month Treasury Constant Maturity Rate (GS3M) rf3m = DataReader('GS3M', 'fred', start=min_date, end=end).resample('MS').mean() # We have to drop the last row to prevent overlapping # We couldn't have used timedelta to go back 1 month as some have 31 days while others 30 rf3m.drop(rf3m.tail(1).index, inplace=True) rf3m.columns = rf.columns rf3m = rf3m.div(100).div(12) # Concatenate both risk-free rates pd.Series rf_concat = pd.concat([rf3m, self.__rf], sort=True) errmsg: str = f"Got {rf_concat.shape} shape, but ({len(li[0].index)}, 1) expected." assert rf_concat.shape[1] == 1, errmsg self.__rf = rf_concat # Join both series in a sole one # self.__rf = rf_concat.iloc[:, 0].add(rf_concat.iloc[:, 1], fill_value=0) else: # Truncate rows of different length according to their dates self.__truncate_rows() # Verify if the rows were correctly truncated not_none_attributes_list = self.__among_not_none_attributes( ) err_message = "Rows were not correctly truncated" assert self.__check_index_length_match( not_none_attributes_list), err_message # Update the 'self.__date' attribute with the first item self.__date = not_none_attributes_list[0].index.to_numpy( ).copy() # Propagate same indexes to the other datasets to force a perfect match self.__make_indices_values_match() # Verify that indices have same indexes err_message = "Values do not match among not 'None' attributes." assert self.__check_index_values_match( self.__among_not_none_attributes()), err_message self.__update()
#let's get an overall look at the average daily return using a histogram. # We'll use seaborn to create both a histogram and kde plot on the same figure. # Note the use of dropna() here, otherwise the NaN values can't be read by seaborn #sns.distplot(Stocks[stock1]['Daily Return'].dropna(),bins=100,color='purple') #plt.show() #****************************************************************** # Section 3: Stock analysis for all stock list #****************************************************************** # Grab all the closing prices for the tech stock list into one DataFrame closing_df = DataReader(tech_list, 'google', start, end)['Close'] # Let's take a quick look print(closing_df.tail()) # Make a new tech returns DataFrame for all stocks tech_rets = closing_df.pct_change() # Comparing Google to itself should show a perfectly linear relationship #sns.jointplot(stock1,stock1,tech_rets,kind='scatter',color='seagreen') #plt.show() # use seaborn for multiple comparison analysis # Set up our figure by naming it returns_fig, call PairPLot on the DataFrame #returns_fig = sns.PairGrid(tech_rets.dropna()) # Using map_upper we can specify what the upper triangle will look like. #returns_fig.map_upper(plt.scatter,color='purple')
class RLModel(): def __init__(self, rl_model, pred_model, sent_crawler): self.rl_model = rl_model #length of observation self.obs_len = 14 #date for scaling historical data self.scaling_start_date = '2016-01-01' self.pred_model = pred_model self.sent_crawler = sent_crawler self.scaling_df = pd.DataFrame() self.history_df = pd.DataFrame() self.last_ticker = '' self.last_day = '' def _scale_df(self, target_df): price_columns = ['Open', 'High', 'Low', 'Close', 'Adj Close'] high = self.scaling_df['High'].max() low = self.scaling_df['Adj Close'].min() diff = high - low #scale stock info by same scales target_df[price_columns] = target_df[price_columns].applymap( lambda x: ((x - low) / diff)) #scale volume by itself scaler = MinMaxScaler() scaler.fit(self.scaling_df['Volume'].to_numpy().reshape(-1, 1)) target_df['Volume'] = scaler.transform( target_df['Volume'].to_numpy().reshape(-1, 1)) return target_df def get_action_from_sent(self, ticker, day, sentiment_df): day = datetime.strptime(day, '%Y-%m-%d').strftime('%Y-%m-%d') #if the historical data doesnt match the ticker and day if not ((self.last_ticker == ticker) and (self.last_day == day)): print('building historical data in sent') self.build_history(ticker, day) #merge historical data and sentiment data obs_df = pd.merge(self.history_df, sentiment_df, on='Date') obs_df.fillna(inplace=True, value=0) #make lstm prediction data pred_df = self.pred_model.predict(ticker, day, self.obs_len // 2) #combine historical data and prediction data obs_df = obs_df.append(pred_df, ignore_index=True) # set sentiment for future pred at 0 obs_df.fillna(inplace=True, value=0) #scale data obs_df = self._scale_df(obs_df) print(obs_df) #rearrange input to match the training data col_order = [ 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'Sentiment Score' ] obs_df = obs_df[col_order] #make rl prediction action, _states = self.rl_model.predict(obs_df) return action # this method wraps data collection and prediction into one method def get_action(self, ticker, day): day = datetime.strptime(day, '%Y-%m-%d').strftime('%Y-%m-%d') #if the historical data doesnt match the ticker and day if not ((self.last_ticker == ticker) and (self.last_day == day)): print('building historical data') self.build_history(ticker, day) #get sentiment sentiment_df = self.sent_crawler.get_sentiment(ticker, self.history_df.index) return self.get_action_from_sent(ticker, day, sentiment_df) #builds the historical portion of the stock data using yahoo datareader #This call should be done before calling any of the get_action variants, but it should handle fine with last #returns - datetime index to use for getting sentiment from firebase for historical data def build_history(self, ticker, day): self.last_ticker = ticker self.last_day = datetime.strptime(day, '%Y-%m-%d').strftime('%Y-%m-%d') #get data history_start_date = datetime.strptime( day, '%Y-%m-%d') - timedelta(days=self.obs_len) self.scaling_df = DataReader(ticker, 'yahoo', start=self.scaling_start_date) self.history_df = DataReader(ticker, 'yahoo', start=history_start_date, end=day) self.history_df = self.history_df.tail(self.obs_len // 2) # print(self.history_df) # print(sent_df) #returns index to get correct sentiment from firebase return self.history_df.index # test_model = RLModel(cycle_base_model, lstm) # test_model.get_action('AAPL', '2019-01-01')
) # secondary_y: column on tight axis with different scale plt.tight_layout() # improving layout by reducing white spaces plt.show() #################### import pandas as pd from pandas_datareader.data import DataReader from datetime import date start = date(2015, 1, 1) # default Jan 1, 2010 end = date(2016, 12, 31) # default: today ticker = 'GOOG' data_source = 'google' stock_data = DataReader(ticker, data_source, start, end) stock_data.info() pd.concat([stock_data.head(3), stock_data.tail(3)]) stock_data.tail(3) import matplotlib.pyplot as plt stock_data['Close'].plot(title=ticker) plt.show() # ------------------------ FRED import pandas as pd from pandas_datareader.data import DataReader from datetime import date start = date(1962, 1, 1) # default Jan 1, 2010 series_code = 'DGS10' # 10-year Treasury Rate data_source = 'fred' # FED Economic Data Service
port_trend = [] port_close = [] port = [] #def port_perf(portfolio_equities, start_date, end_date): #stocks_list = 'sp500' #tickers = retrieve_tickers.retrieve_tickers(stocks_list) #td = datetime.datetime.now().strftime('%Y-%m-%d') # Write 2 DataFrames for i in portfolio_equities: port = DataReader(i, "yahoo", start_date, end_date) #port['Date'] = port.index port['Symbol'] = i port_close.append(port.tail(1)) print port_close['Symbol'] #port = DataReader(portfolio_equities, "yahoo", start_date, end_date) #print port.to_frame() # 2. Retrieve stock data
import gym import custom_anytrading from stable_baselines.common.policies import MlpPolicy from stable_baselines.common import make_vec_env from stable_baselines import PPO2 from sklearn.preprocessing import MinMaxScaler print('loading data') data = DataReader('AAPL', 'yahoo', start='2000-01-01', end='2019-01-01') print(data.head) test_data = data.tail(200) train_data = data.head(-500) env = gym.make('custom_stocks-v0', stock_df=train_data, pred_df=train_data, window_size=14, initial_balance=5000, min_percent_loss=.25, with_pred=False) test_env = gym.make('custom_stocks-v0', stock_df=test_data, pred_df=test_data, window_size=14, initial_balance=5000,
import pandas as pd from pandas import Series, DataFrame import numpy as np app = Flask(__name__) import matplotlib.pyplot as plt from pandas_datareader.data import DataReader #モジュールが変わったため変更 from datetime import datetime end = datetime.now() start = datetime(end.year - 1, end.month, end.day) toyota = DataReader('TM', 'yahoo', start, end) print(toyota.tail()) dfs=toyota.tail() header=pd.DataFrame(dfs) record = header.values.tolist() @app.route('/') def index(): return render_template('index.html', header=header, record=record) if __name__ == '__main__': app.run()
df.index = pd.to_datetime(df.index, format='%Y-%m-%d') df['year'] = df.index.year.values df['month'] = df.index.month.values df['day'] = df.index.day.values df.head() # print(year) df['date'] = df['year'].astype(str) + '-' + df['month'].astype( str) + '-' + df['day'].astype(str) # df['date'] = pd.to_datetime(df[['year', 'month','day']]) df.date.values df.tail() df.close.plot(figsize=(12, 8), title='MU') # for i, (index, row) in enumerate(df.iterrows()): # print (row) # print( df.loc[df.index[ i - 4 ], 'close']) # df.at[index,'Momentum_function'] # Selected technical indicators and their formulas (Type 1). # Stochastic %K lowest = df['low'].rolling(window=4).min() df['Stochastic_k'] = pd.Series((df['close'] - lowest)) / (df['high'] - lowest) # Calculating in two different ways