def stock_autocorr(ticker): stock = DataReader(ticker, data_source, start_date) stock['mean'] = (stock['Open'] + stock['High'] + stock['Low'] + stock['Close']) / 4 stock_change = stock.drop(drop_col, axis=1) stock_change['pct_change'] = stock_change.pct_change() stock_autocorr = stock_change['pct_change'].autocorr() print( 'Over the past {0} years, the auto-correlation of {1} daily point change is:{2}' .format(years, ticker, stock_autocorr))
pd.set_option('display.width', 400) pd.set_option('display.max_columns', 10) import numpy as np import matplotlib.pyplot as plt from pandas_datareader.data import DataReader from sklearn.metrics import mean_absolute_error from sklearn.linear_model import LinearRegression import pprint beginDate = '2011-03-04' endDate = '2021-03-04' stock = 'AAPL' df = DataReader(stock, 'yahoo', beginDate, endDate) #df = pd.read_csv('MSFT.csv',index_col=0) df['Close'] = df['Adj Close'] df = df.drop('Adj Close', axis=1) # Moving averages with periods 5,10,20,50,100,200 days for ma_period in [5, 10, 20, 50, 100, 200]: indicator_name = 'ma_%d' % (ma_period) df[indicator_name] = df['Close'].rolling(ma_period).mean() # Bollinger bands (the moving average plus and minus 1 and 2 standard deviations) df['Boll_Up_20_2'] = df['Close'].rolling( 20).mean() + 2 * df['Close'].rolling(20).std() df['Boll_Down_20_2'] = df['Close'].rolling( 20).mean() - 2 * df['Close'].rolling(20).std() df['Boll_Up_20_1'] = df['Close'].rolling(20).mean() + df['Close'].rolling( 20).std() df['Boll_Down_20_1'] = df['Close'].rolling(20).mean() - df['Close'].rolling( 20).std()
# note: if using line above, loading from csv, comment out section below # # User input and load ticker data from yahoo finance while True: try: ticker = input('Enter Stock Ticker: ').upper() df = DataReader(ticker, 'yahoo', beginDate, endDate) #df = yf.Ticker(ticker, start=beginDate, end=endDate) except: print('\nStock Ticker Symbol does not exist!\n') continue; break # # Use Adj Close instead of Close df['Close'] = df['Adj Close'] df = df.drop(columns=['Adj Close','High','Low','Open'], axis=1) print(f'\n{df.describe()}\n') df['Moving_av']= df['Close'].rolling(window=50,min_periods=0).mean() df['Moving_av'].plot() i=1 rate_increase_in_vol=[0] rate_increase_in_close=[0] while i<len(df): rate_increase_in_vol.append(df.iloc[i]['Volume']-df.iloc[i-1]['Volume']) rate_increase_in_close.append(df.iloc[i]['Close']-df.iloc[i-1]['Close']) i+=1 df['Increase_in_vol']=rate_increase_in_vol df['Increase_in_close']=rate_increase_in_close df['Increase_in_vol'].plot() df['Increase_in_close'].plot()
def __update(self): """Updates class attributes.""" p, mv, rf = self.__prices, self.__mv, self.__rf # Select attributes different from 'None' li = [x for x in (p, mv, rf) if x is not None] # if there is no element in the list, i.e., if all attributes are 'None' if len(li) == 0: self.__date = None # if there is only one element not 'None' in the list, 'self.__date' should be equal to its index elif len(li) == 1: self.__date: np.ndarray = li[0].index.to_numpy() # if there is at least 2 attributes that are not 'None' we must verify if rows match in length and in values else: # if lengths match (to prevent ValueError) if self.__check_index_length_match(li): # if length and values are the same if self.__check_index_values_match(li): self.__date = li[0].index.to_numpy().copy() # if lengths are equal among each dataset index, but not the values else: # if values do not match, we force them to take the same print( "Lengths of rows match, but not they have different values." ) self.__date = li[0].index.to_numpy().copy() self.__make_indices_values_match() assert self.__check_index_values_match(li) # if any length mismatch, we truncate all DataFrames or Series else: # Get the oldest date among the list of DataFrames min_date = min([df.index.min() for df in li]) # In the case there is a risk-free rate and that it begins after the other series: try # to complete it with the 3 month proxy if (self.__rf is not None) & (self.__rf.index[0] > min_date): # Get initial date of the risk-free rate series end = rf.index[0] # 3-Month Treasury Constant Maturity Rate (GS3M) rf3m = DataReader('GS3M', 'fred', start=min_date, end=end).resample('MS').mean() # We have to drop the last row to prevent overlapping # We couldn't have used timedelta to go back 1 month as some have 31 days while others 30 rf3m.drop(rf3m.tail(1).index, inplace=True) rf3m.columns = rf.columns rf3m = rf3m.div(100).div(12) # Concatenate both risk-free rates pd.Series rf_concat = pd.concat([rf3m, self.__rf], sort=True) errmsg: str = f"Got {rf_concat.shape} shape, but ({len(li[0].index)}, 1) expected." assert rf_concat.shape[1] == 1, errmsg self.__rf = rf_concat # Join both series in a sole one # self.__rf = rf_concat.iloc[:, 0].add(rf_concat.iloc[:, 1], fill_value=0) else: # Truncate rows of different length according to their dates self.__truncate_rows() # Verify if the rows were correctly truncated not_none_attributes_list = self.__among_not_none_attributes( ) err_message = "Rows were not correctly truncated" assert self.__check_index_length_match( not_none_attributes_list), err_message # Update the 'self.__date' attribute with the first item self.__date = not_none_attributes_list[0].index.to_numpy( ).copy() # Propagate same indexes to the other datasets to force a perfect match self.__make_indices_values_match() # Verify that indices have same indexes err_message = "Values do not match among not 'None' attributes." assert self.__check_index_values_match( self.__among_not_none_attributes()), err_message self.__update()
#df = pd.read_csv('XXXXXX.csv',index_col=0) # note: if using line above, loading from csv, comment out section below # # User input and load ticker data from yahoo finance while True: try: ticker = input('Enter Stock Ticker: ').upper() df = DataReader(ticker, 'yahoo', beginDate, endDate) except: print('\nStock Ticker Symbol does not exist!\n') continue break # # Use Adj Close instead of Close df['Close'] = df['Adj Close'] df = df.drop('Adj Close', axis=1) df = df.drop('High', axis=1) df = df.drop('Low', axis=1) df = df.drop('Open', axis=1) # Future Days - number of days to predict modelData = df['Close'].to_frame() futureDays = 30 modelData['Predict'] = modelData['Close'].shift(-futureDays) modelData = modelData.dropna() print(modelData) x = np.array(modelData.drop(['Predict'], 1))[:-futureDays] #print(x) y = np.array(modelData['Predict'])[:-futureDays] #print(y) #
def stock_info(ticker): today = datetime.today() beginDate = '2020-01-01' endDate = datetime.datetime.now().date() script = "9ho5HG7o00PT-g" secret = "2CQTFbYyYp5aLEN7bHkKGO8X4E3YHQ" beginDate = '2020-12-01' endDate = datetime.today().strftime('%Y-%m-%d') def df_from_response(res): df = pd.DataFrame() for post in res.json()['data']['children']: df = df.append( { 'subreddit': post['data']['subreddit'], 'title': post['data']['title'], 'selftext': post['data']['selftext'], 'num_comments': post['data']['num_comments'], 'upvote_ratio': post['data']['upvote_ratio'], 'date': datetime.fromtimestamp( post['data']['created_utc']).strftime('%Y-%m-%d'), 'ups': post['data']['ups'], 'downs': post['data']['downs'], 'score': post['data']['score'], 'kind': post['kind'], 'id': post['data']['id'], }, ignore_index=True) return df auth = requests.auth.HTTPBasicAuth(script, secret) data = { 'grant_type': 'password', 'username': '******', 'password': '******' } headers = {'User-Agent': 'Final_Project/0.0.1'} request = requests.post('https://www.reddit.com/api/v1/access_token', auth=auth, data=data, headers=headers) token = f"bearer {request.json()['access_token']}" headers = {**headers, **{'Authorization': token}} posts = pd.read_csv("trimmed_posts.csv") selected_cols = ['title', 'selftext'] df = DataReader(ticker, 'yahoo', beginDate, endDate) df['Close'] = df['Adj Close'] df = df.drop(columns=['Adj Close', 'High', 'Low', 'Open'], axis=1) modelData = df['Close'].to_frame() five_rolling = modelData.rolling(window=5).mean() ten_rolling = modelData.rolling(window=10).mean() twenty_rolling = modelData.rolling(window=20).mean() fifty_rolling = modelData.rolling(window=50).mean() hundred_rolling = modelData.rolling(window=100).mean() futureDays = 10 modelData['Target'] = modelData['Close'].shift(-futureDays) X = np.array(modelData.drop(['Target'], 1))[:-futureDays] y = np.array(modelData['Target'])[:-futureDays] Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.25) Xfuture = modelData.drop(['Target'], 1)[:-futureDays] Xfuture = Xfuture.tail(futureDays) Xfuture = np.array(Xfuture) train_data, test_data = df[0:int(len(df) * 0.7)], df[int(len(df) * 0.75):] training_data = train_data['Close'].values test_data = test_data['Close'].values history = [x for x in training_data] model_predictions = [] N_test_observations = len(test_data) for time_point in range(N_test_observations): model = ARIMA(history, order=(4, 1, 0)) model_fit = model.fit(disp=0) output = model_fit.forecast() yhat = output[0] model_predictions.append(yhat) true_test_value = test_data[time_point] history.append(true_test_value) MSE_error = mean_squared_error(test_data, model_predictions) linear = LinearRegression().fit(Xtrain, ytrain) linearPrediction = linear.predict(Xfuture) linearResult = linear.score(Xtrain, ytrain) valid = modelData[X.shape[0]:] valid['Target'] = predictions tree = DecisionTreeRegressor().fit(Xtrain, ytrain) treePrediction = tree.predict(Xfuture) treeResult = tree.score(Xtrain, ytrain) predictions = treePrediction valid = modelData[X.shape[0]:] valid['Predict'] = predictions todaysDate = datetime.datetime.now().date() futureDays = 10 us_bd = CustomBusinessDay(calendar=USFederalHolidayCalendar()) futureDates = pd.date_range(todaysDate, periods=futureDays, freq=us_bd) combinedDFcol = ['Close', 'Predict', 'SM1', 'SM2', 'SM3', 'SM4'] futureDF = pd.DataFrame(index=futureDates, columns=combinedDFcol) futureDF['Predict'] = model_fit.forecast(steps=futureDays)[0] combinedDF = df.append(futureDF, ignore_index=False) combinedDF.index.names = ['Date'] currInfo = yf.Ticker(ticker).info currInfo = yf.Ticker(ticker).info infoDict = { 'longName: ': currInfo['symbol'], 'Current Ask/Bid: ': str(currInfo['ask']) + '/' + str(currInfo['bid']), 'Open Price: ': str(round(currInfo['open'], 2)), 'High/Low Price: ': str(currInfo['dayHigh']) + '/' + str(currInfo['dayLow']), 'Avg Volume: ': str(currInfo['averageVolume']), 'Volume: ': str(currInfo['volume']), '52w High: ': str(round(currInfo['fiftyTwoWeekHigh'], 2)), '52w Low: ': str(round(currInfo['fiftyTwoWeekLow'], 2)), 'MorningStar Rating: ': str(currInfo['morningStarOverallRating']), 'Short Ratio: ': str(currInfo['shortRatio']) } try: new_df = posts[posts[selected_cols].apply( lambda x: x.str.contains(ticker)).all(axis=1)] data = new_df data['Ticker'] = ticker group_df = new_df.groupby('created_utc') \ .agg({'id':'count', 'num_comments':'mean', 'score':'mean'}) \ .rename(columns={'id':'post_count','num_comments':'avg_comments', 'score':'avg_score'}) \ .reset_index() group_df['Ticker'] = ticker except: places = { 'created_utc': 0, 'post_count': 0, 'avg_comments': 0, 'avg_score': 0, 'ticker': ticker } group_df = pd.DataFrame.from_dict(places) super_posts_df = data.loc[data['score'] > 300] if not super_posts_df.empty: super_posts_df = super_posts_df.groupby( ['created_utc', 'Ticker'])['score'].apply(lambda x: (x >= 300).sum()).reset_index( name='Count_Score_300') try: pytrend = TrendReq(hl='en-US', tz=360) pytrend.build_payload(kw_list=[ticker], timeframe=beginDate + ' ' + endDate, geo='US') df = pytrend.interest_over_time() df['Noise'] = df[ticker] df[ticker] = ticker df.index.names = ['Date'] df.columns = ['Ticker', 'isPartial', 'Noise'] mergedNoise = df except: noise = {'Ticker': ticker, 'isPartial': 'No', 'Noise': 0} mergedNoise = pd.DataFrame.from_dict(noise) group_df['created_utc'] = pd.to_datetime(group_df['created_utc']) if not super_posts_df.empty: super_posts_df['created_utc'] = pd.to_datetime( super_posts_df['created_utc']) merged = group_df.merge(mergedNoise, left_on=['created_utc', 'Ticker'], right_on=['Date', 'Ticker'], how='left') if not super_posts_df.empty: merged = merged.merge(super_posts_df, left_on=['created_utc', 'Ticker'], right_on=['created_utc', 'Ticker'], how="left") else: merged['Count_Score_300'] = 0 merged.drop(columns=['isPartial']) stockData = yf.download(ticker, start=beginDate, end=endDate) stockData['Ticker'] = ticker stockReport = pd.DataFrame(stockData, columns=['Ticker', 'Adj Close', 'Volume']) merged = merged.merge(stockReport, left_on=['created_utc', 'Ticker'], right_on=['Date', 'Ticker'], how='left') merged['post_count_change'] = merged['post_count'].pct_change() merged['avg_score_change'] = merged['avg_score'].pct_change() merged['Adj Close_change'] = merged['Adj Close'].pct_change() merged['Volume_change'] = merged['Volume'].pct_change() merged.replace([np.inf, -np.inf], np.nan, inplace=True) merged.replace([np.inf, -np.inf], np.nan).dropna( subset=['post_count_change', 'avg_score_change', 'Volume_change'], how="all") merged = merged.dropna() expected_posts = merged['post_count'].mean() expected_avg_comments = merged['avg_comments'].mean() expected_volume_change = merged['Volume'].mean() expected_300_count = merged['Count_Score_300'].mean() data = pd.DataFrame() length = 0 try: params = {'limit': 100, 'q': ticker, 'restrict_sr': True} res = requests.get("https://oauth.reddit.com/r/WallStreetBets/search", headers=headers, params=params) new_df = df_from_response(res) new_df.sort_values(by=['date'], inplace=True, ascending=False, axis=0) row = new_df.iloc[len(new_df) - 1] fullname = row['kind'] + '_' + row['id'] params['after'] = fullname data = data.append(new_df, ignore_index=True) except: data data['date'] = pd.to_datetime(data['date']) super_posts_live = data.groupby('date')['score'].apply( lambda x: (x >= 300).sum()).reset_index(name='Count_Score_300') if not super_posts_live.empty: super_posts_live = super_posts_live else: super_posts_live['Count_Score_300'] = 0 try: data['Ticker'] = ticker live_group = data.groupby('date') \ .agg({'id':'count', 'num_comments':'mean', 'score':'mean'}) \ .rename(columns={'id':'post_count','num_comments':'avg_comments', 'score':'avg_score'}) \ .reset_index() live_group['Ticker'] = ticker except: places = { 'created_utc': 0, 'post_count': 0, 'avg_comments': 0, 'avg_score': 0, 'ticker': ticker } live_group = pd.DataFrame.from_dict(places) live_group = live_group.merge(super_posts_live, left_on=['date'], right_on=['date'], how="left") live_group = live_group.merge(stockReport, left_on=['date', 'Ticker'], right_on=['Date', 'Ticker'], how='left') live_group['Adj Close_change'] = live_group['Adj Close'].pct_change() live_group['Volume_change'] = live_group['Volume'].pct_change() live_group['post_count_change'] = live_group['post_count'].pct_change() live_group['avg_score_change'] = live_group['avg_score'].pct_change() xfits = live_group[live_group.date > datetime.now() - pd.to_timedelta("3day")] xfits_dates = xfits['date'] xfittings = xfits[[ 'post_count_change', 'avg_score_change', 'Volume_change', 'Count_Score_300' ]] X_fits_scaled = X_scaler.transform(xfittings) social_predictions = model.predict(X_fits_scaled) social_predictions = pd.DataFrame(social_predictions.reshape(-1, 1)) Xnew, _ = make_regression(n_samples=10, n_features=4, noise=0.01, random_state=1) ynew = model.predict(Xnew) future_predict_df = pd.DataFrame(ynew.reshape(-1, 1)) live_group.sort_values(by=['date'], inplace=True, ascending=False) future_dates = pd.date_range(start=today, periods=10).strftime('%Y-%m-%d') futureDates = pd.date_range(todaysDate, periods=futureDays, freq=us_bd) futureDates['SMPredict'] = df.append(future_predict_df, ignore_index=False) SMPredict = pd.DataFrame(index=future_dates, columns=combinedDFcol) future_predict_df = pd.DataFrame(ynew.reshape(-1, 1), future_dates) future_predict_df.rename(columns={0: "SMPredict"}) SMPredict = SMPredict.merge(future_predict_df, left_index=True, right_index=True, how="left") xfits = xfits.drop(columns=[ 'Ticker', 'Adj Close_change', 'Volume_change', 'post_count_change', 'avg_score_change' ]) xfits['SMPredictions'] = social_predictions xfits = xfits.set_index('date') xfits = df.append(SMPredict, ignore_index=False) xfits = xfits.drop(columns=['Ticker', 'isPartial', 'Noise']) xfits.index.name = "Date" combinedDFcol = ['Close', 'Predict', 'SM1', 'SM2', 'SM3', 'SM4'] futureDF = pd.DataFrame(index=futureDates, columns=combinedDFcol) futureDF['Predict'] = model_fit.forecast(steps=futureDays)[0] combinedDF = df.append(futureDF, ignore_index=False) combinedDF.index.name = ['Date'] today_dict = xfits.to_dict() futureSM_prediction = SMPredict.to_dict() carls_dict = combinedDF.to_dict() return ({today_dict, futureSM_prediction, carls_dict})
import pandas as pd import numpy as np from numpy.testing import assert_almost_equal from pandas_datareader.data import DataReader import penfmb as pn # French Data library kwds = {'data_source': 'famafrench', 'start': '1972-1', 'end': '2013-12'} MONTHLY = 0 ff = DataReader("F-F_Research_Data_Factors", **kwds)[MONTHLY] twentyfive = DataReader("25_Portfolios_5x5", **kwds)[MONTHLY] mom = DataReader("F-F_Momentum_Factor", **kwds)[MONTHLY] twentyfive = twentyfive.subtract(ff['RF'], axis=0) carhart = pd.concat([ff.drop('RF', axis=1), mom], axis=1) class TestMain(unittest.TestCase): def test_fmb(self): _, b , _ = pn._fmb(twentyfive, ff['Mkt-RF']) assert_almost_equal(b.params, np.array([ 1.4741268, -0.6969725]), decimal=4) _, b, _ = pn._fmb(twentyfive, ff.drop('RF', axis=1)) assert_almost_equal(b.params, np.array([ 1.3495192, -0.7947864, 0.1414998, 0.4257349]), decimal=4) def test_PenFMB(self): penfmb = pn.PenFMB(nboot=1).fit(twentyfive, carhart)