plt.plot(modelData['Close']) plt.plot(valid[['Close', 'Predict']]) plt.legend(['Original', 'Actual', 'Predicted']) plt.savefig(f'.\output\{ticker}-DecisionTree_{beginDate}_{endDate}') plt.savefig('.\output\DecisionTree') plt.show() # # Create new DataFrame with future business days-closing price populated todaysDate = datetime.datetime.now().date() futureDays = 10 us_bd = CustomBusinessDay(calendar=USFederalHolidayCalendar()) futureDates = pd.date_range(todaysDate, periods=futureDays, freq=us_bd) combinedDFcol = ['Close', 'Predict', 'SM1', 'SM2', 'SM3', 'SM4'] futureDF = pd.DataFrame(index=futureDates, columns=combinedDFcol) futureDF['Predict'] = model_fit.forecast(steps=futureDays)[0] combinedDF = df.append(futureDF, ignore_index=False) combinedDF.index.names = ['Date'] currInfo = yf.Ticker(ticker).info #X_new = np.array(futureDates).astype(float).reshape(-1, 1) #finalPredict = linear.predict(X_new) #print(finalPredict) print(combinedDF) # Write to CSV combinedDF.to_csv(f'.\output\{ticker}-CombinedDF_{beginDate}_{endDate}.csv') combinedDF.to_csv('.\output\combinedDF.csv') # Output future days to text file futureDF = futureDF.drop(columns=['Close', 'SM1', 'SM2', 'SM3', 'SM4'], axis=0) futureDF.index.names = ['Date'] with open(".\output\Prediced_Stock_Price.txt", "w") as f: print(futureDF, file=f) f.close()
ticker = 'PENN' data_source = 'yahoo' penn = DataReader(ticker,data_source,start,end) penn = penn.reset_index() penn['Symbol'] = 'PENN' ticker = 'MGM' data_source = 'yahoo' mgm = DataReader(ticker,data_source,start,end) mgm = mgm.reset_index() mgm['Symbol'] = 'MGM' ticker = 'WYNN' data_source = 'yahoo' wynn = DataReader(ticker,data_source,start,end) wynn = wynn.reset_index() wynn['Symbol'] = 'WYNN' combined = penn.append(mgm).append(wynn) sns.lineplot(x="Date", y="Volume",data=combined,hue="Symbol"); plt.show() #sns.barplot(x=stock_data.index, y="Volume",data=stock_data) plt.show() #narrow2 = df[['Date','Market Capitalization','Sector','Symbol']] ##n2 = narrow2[narrow2['Sector'] == 'Consumer Staples'] #n3 = n2[n2['Market Capitalization'] > 100000000000] #sns.lineplot(x="Date", y="Market Capitalization", hue="Symbol",data=n3) print("--- %s minutes ---" % round((time.time() - start_time)/60,3))
def stock_info(ticker): today = datetime.today() beginDate = '2020-01-01' endDate = datetime.datetime.now().date() script = "9ho5HG7o00PT-g" secret = "2CQTFbYyYp5aLEN7bHkKGO8X4E3YHQ" beginDate = '2020-12-01' endDate = datetime.today().strftime('%Y-%m-%d') def df_from_response(res): df = pd.DataFrame() for post in res.json()['data']['children']: df = df.append( { 'subreddit': post['data']['subreddit'], 'title': post['data']['title'], 'selftext': post['data']['selftext'], 'num_comments': post['data']['num_comments'], 'upvote_ratio': post['data']['upvote_ratio'], 'date': datetime.fromtimestamp( post['data']['created_utc']).strftime('%Y-%m-%d'), 'ups': post['data']['ups'], 'downs': post['data']['downs'], 'score': post['data']['score'], 'kind': post['kind'], 'id': post['data']['id'], }, ignore_index=True) return df auth = requests.auth.HTTPBasicAuth(script, secret) data = { 'grant_type': 'password', 'username': '******', 'password': '******' } headers = {'User-Agent': 'Final_Project/0.0.1'} request = requests.post('https://www.reddit.com/api/v1/access_token', auth=auth, data=data, headers=headers) token = f"bearer {request.json()['access_token']}" headers = {**headers, **{'Authorization': token}} posts = pd.read_csv("trimmed_posts.csv") selected_cols = ['title', 'selftext'] df = DataReader(ticker, 'yahoo', beginDate, endDate) df['Close'] = df['Adj Close'] df = df.drop(columns=['Adj Close', 'High', 'Low', 'Open'], axis=1) modelData = df['Close'].to_frame() five_rolling = modelData.rolling(window=5).mean() ten_rolling = modelData.rolling(window=10).mean() twenty_rolling = modelData.rolling(window=20).mean() fifty_rolling = modelData.rolling(window=50).mean() hundred_rolling = modelData.rolling(window=100).mean() futureDays = 10 modelData['Target'] = modelData['Close'].shift(-futureDays) X = np.array(modelData.drop(['Target'], 1))[:-futureDays] y = np.array(modelData['Target'])[:-futureDays] Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.25) Xfuture = modelData.drop(['Target'], 1)[:-futureDays] Xfuture = Xfuture.tail(futureDays) Xfuture = np.array(Xfuture) train_data, test_data = df[0:int(len(df) * 0.7)], df[int(len(df) * 0.75):] training_data = train_data['Close'].values test_data = test_data['Close'].values history = [x for x in training_data] model_predictions = [] N_test_observations = len(test_data) for time_point in range(N_test_observations): model = ARIMA(history, order=(4, 1, 0)) model_fit = model.fit(disp=0) output = model_fit.forecast() yhat = output[0] model_predictions.append(yhat) true_test_value = test_data[time_point] history.append(true_test_value) MSE_error = mean_squared_error(test_data, model_predictions) linear = LinearRegression().fit(Xtrain, ytrain) linearPrediction = linear.predict(Xfuture) linearResult = linear.score(Xtrain, ytrain) valid = modelData[X.shape[0]:] valid['Target'] = predictions tree = DecisionTreeRegressor().fit(Xtrain, ytrain) treePrediction = tree.predict(Xfuture) treeResult = tree.score(Xtrain, ytrain) predictions = treePrediction valid = modelData[X.shape[0]:] valid['Predict'] = predictions todaysDate = datetime.datetime.now().date() futureDays = 10 us_bd = CustomBusinessDay(calendar=USFederalHolidayCalendar()) futureDates = pd.date_range(todaysDate, periods=futureDays, freq=us_bd) combinedDFcol = ['Close', 'Predict', 'SM1', 'SM2', 'SM3', 'SM4'] futureDF = pd.DataFrame(index=futureDates, columns=combinedDFcol) futureDF['Predict'] = model_fit.forecast(steps=futureDays)[0] combinedDF = df.append(futureDF, ignore_index=False) combinedDF.index.names = ['Date'] currInfo = yf.Ticker(ticker).info currInfo = yf.Ticker(ticker).info infoDict = { 'longName: ': currInfo['symbol'], 'Current Ask/Bid: ': str(currInfo['ask']) + '/' + str(currInfo['bid']), 'Open Price: ': str(round(currInfo['open'], 2)), 'High/Low Price: ': str(currInfo['dayHigh']) + '/' + str(currInfo['dayLow']), 'Avg Volume: ': str(currInfo['averageVolume']), 'Volume: ': str(currInfo['volume']), '52w High: ': str(round(currInfo['fiftyTwoWeekHigh'], 2)), '52w Low: ': str(round(currInfo['fiftyTwoWeekLow'], 2)), 'MorningStar Rating: ': str(currInfo['morningStarOverallRating']), 'Short Ratio: ': str(currInfo['shortRatio']) } try: new_df = posts[posts[selected_cols].apply( lambda x: x.str.contains(ticker)).all(axis=1)] data = new_df data['Ticker'] = ticker group_df = new_df.groupby('created_utc') \ .agg({'id':'count', 'num_comments':'mean', 'score':'mean'}) \ .rename(columns={'id':'post_count','num_comments':'avg_comments', 'score':'avg_score'}) \ .reset_index() group_df['Ticker'] = ticker except: places = { 'created_utc': 0, 'post_count': 0, 'avg_comments': 0, 'avg_score': 0, 'ticker': ticker } group_df = pd.DataFrame.from_dict(places) super_posts_df = data.loc[data['score'] > 300] if not super_posts_df.empty: super_posts_df = super_posts_df.groupby( ['created_utc', 'Ticker'])['score'].apply(lambda x: (x >= 300).sum()).reset_index( name='Count_Score_300') try: pytrend = TrendReq(hl='en-US', tz=360) pytrend.build_payload(kw_list=[ticker], timeframe=beginDate + ' ' + endDate, geo='US') df = pytrend.interest_over_time() df['Noise'] = df[ticker] df[ticker] = ticker df.index.names = ['Date'] df.columns = ['Ticker', 'isPartial', 'Noise'] mergedNoise = df except: noise = {'Ticker': ticker, 'isPartial': 'No', 'Noise': 0} mergedNoise = pd.DataFrame.from_dict(noise) group_df['created_utc'] = pd.to_datetime(group_df['created_utc']) if not super_posts_df.empty: super_posts_df['created_utc'] = pd.to_datetime( super_posts_df['created_utc']) merged = group_df.merge(mergedNoise, left_on=['created_utc', 'Ticker'], right_on=['Date', 'Ticker'], how='left') if not super_posts_df.empty: merged = merged.merge(super_posts_df, left_on=['created_utc', 'Ticker'], right_on=['created_utc', 'Ticker'], how="left") else: merged['Count_Score_300'] = 0 merged.drop(columns=['isPartial']) stockData = yf.download(ticker, start=beginDate, end=endDate) stockData['Ticker'] = ticker stockReport = pd.DataFrame(stockData, columns=['Ticker', 'Adj Close', 'Volume']) merged = merged.merge(stockReport, left_on=['created_utc', 'Ticker'], right_on=['Date', 'Ticker'], how='left') merged['post_count_change'] = merged['post_count'].pct_change() merged['avg_score_change'] = merged['avg_score'].pct_change() merged['Adj Close_change'] = merged['Adj Close'].pct_change() merged['Volume_change'] = merged['Volume'].pct_change() merged.replace([np.inf, -np.inf], np.nan, inplace=True) merged.replace([np.inf, -np.inf], np.nan).dropna( subset=['post_count_change', 'avg_score_change', 'Volume_change'], how="all") merged = merged.dropna() expected_posts = merged['post_count'].mean() expected_avg_comments = merged['avg_comments'].mean() expected_volume_change = merged['Volume'].mean() expected_300_count = merged['Count_Score_300'].mean() data = pd.DataFrame() length = 0 try: params = {'limit': 100, 'q': ticker, 'restrict_sr': True} res = requests.get("https://oauth.reddit.com/r/WallStreetBets/search", headers=headers, params=params) new_df = df_from_response(res) new_df.sort_values(by=['date'], inplace=True, ascending=False, axis=0) row = new_df.iloc[len(new_df) - 1] fullname = row['kind'] + '_' + row['id'] params['after'] = fullname data = data.append(new_df, ignore_index=True) except: data data['date'] = pd.to_datetime(data['date']) super_posts_live = data.groupby('date')['score'].apply( lambda x: (x >= 300).sum()).reset_index(name='Count_Score_300') if not super_posts_live.empty: super_posts_live = super_posts_live else: super_posts_live['Count_Score_300'] = 0 try: data['Ticker'] = ticker live_group = data.groupby('date') \ .agg({'id':'count', 'num_comments':'mean', 'score':'mean'}) \ .rename(columns={'id':'post_count','num_comments':'avg_comments', 'score':'avg_score'}) \ .reset_index() live_group['Ticker'] = ticker except: places = { 'created_utc': 0, 'post_count': 0, 'avg_comments': 0, 'avg_score': 0, 'ticker': ticker } live_group = pd.DataFrame.from_dict(places) live_group = live_group.merge(super_posts_live, left_on=['date'], right_on=['date'], how="left") live_group = live_group.merge(stockReport, left_on=['date', 'Ticker'], right_on=['Date', 'Ticker'], how='left') live_group['Adj Close_change'] = live_group['Adj Close'].pct_change() live_group['Volume_change'] = live_group['Volume'].pct_change() live_group['post_count_change'] = live_group['post_count'].pct_change() live_group['avg_score_change'] = live_group['avg_score'].pct_change() xfits = live_group[live_group.date > datetime.now() - pd.to_timedelta("3day")] xfits_dates = xfits['date'] xfittings = xfits[[ 'post_count_change', 'avg_score_change', 'Volume_change', 'Count_Score_300' ]] X_fits_scaled = X_scaler.transform(xfittings) social_predictions = model.predict(X_fits_scaled) social_predictions = pd.DataFrame(social_predictions.reshape(-1, 1)) Xnew, _ = make_regression(n_samples=10, n_features=4, noise=0.01, random_state=1) ynew = model.predict(Xnew) future_predict_df = pd.DataFrame(ynew.reshape(-1, 1)) live_group.sort_values(by=['date'], inplace=True, ascending=False) future_dates = pd.date_range(start=today, periods=10).strftime('%Y-%m-%d') futureDates = pd.date_range(todaysDate, periods=futureDays, freq=us_bd) futureDates['SMPredict'] = df.append(future_predict_df, ignore_index=False) SMPredict = pd.DataFrame(index=future_dates, columns=combinedDFcol) future_predict_df = pd.DataFrame(ynew.reshape(-1, 1), future_dates) future_predict_df.rename(columns={0: "SMPredict"}) SMPredict = SMPredict.merge(future_predict_df, left_index=True, right_index=True, how="left") xfits = xfits.drop(columns=[ 'Ticker', 'Adj Close_change', 'Volume_change', 'post_count_change', 'avg_score_change' ]) xfits['SMPredictions'] = social_predictions xfits = xfits.set_index('date') xfits = df.append(SMPredict, ignore_index=False) xfits = xfits.drop(columns=['Ticker', 'isPartial', 'Noise']) xfits.index.name = "Date" combinedDFcol = ['Close', 'Predict', 'SM1', 'SM2', 'SM3', 'SM4'] futureDF = pd.DataFrame(index=futureDates, columns=combinedDFcol) futureDF['Predict'] = model_fit.forecast(steps=futureDays)[0] combinedDF = df.append(futureDF, ignore_index=False) combinedDF.index.name = ['Date'] today_dict = xfits.to_dict() futureSM_prediction = SMPredict.to_dict() carls_dict = combinedDF.to_dict() return ({today_dict, futureSM_prediction, carls_dict})
def view_ticker(): stock = request.form['ticker'] #print stock start = request.form['start'] start = datetime.strptime(start, '%Y-%m-%d') start = start.date() #print request.form['start'] end = request.form['end'] end = datetime.strptime(end, '%Y-%m-%d') end = end.date() #print "end" value = '.4' status = 'Close' # if request.form.get('box1'): # value = '.4' # status = 'Close' # if request.form.get('box2'): # value = '.11' # status = 'Adj Close' # if request.form.get('box3'): # value = '.1' # status = 'Open' df = DataReader(stock, 'yahoo', start, end) #mydata = qd.get("WIKI/" + stock + value, rows = 20, api_key='oSvidbxNa84mVv7Kzqh2') df.reset_index(inplace=True, drop=False) #This is where ARIMA starts df['Natural Log'] = df['Close'].apply(lambda x: np.log(x)) price_matrix = df['Close'].as_matrix() model = sm.tsa.ARIMA(price_matrix, order=(1, 0, 3)) results = model.fit( disp=-1) #disp=1 (disp < 0 means no output in this case. 1 = output) #df['Forecast'] = results.fittedvalues #Add one more day df['Date'] = pd.to_datetime(df['Date']) end_new = end + pd.offsets.BDay(1) df = df.append({'Date': end_new}, ignore_index=True) df['changepercent'] = df.Close.pct_change() * 100 seqs = np.arange(df.shape[0]) df["seq"] = pd.Series(seqs) df["Date"] = pd.to_datetime(df["Date"]) df['Date'] = df['Date'].apply(lambda x: x.strftime('%Y/%m/%d')) df['changepercent'] = df['changepercent'].apply( lambda x: str(round(x, 2)) + "%") df['mid'] = df.apply(lambda x: (x['Open'] + x['Close']) / 2, axis=1) df['height'] = df.apply( lambda x: abs(x['Close'] - x['Open'] if x['Close'] != x['Open'] else 0.001), axis=1) inc = df.Close > df.Open dec = df.Open > df.Close w = 0.5 #This is for volume graph df['volinc'] = df.Volume[inc] df['voldec'] = df.Volume[dec] #Add additional Forecast Day forecast_start = df.index[0] forecast_end = df.index[-1] #print forecast_start #print forecast_end #forcast = results.predict(forecast_start, forecast_end, dynamic=False) #, dynamic= True means in-sample df['Forcast_New'] = results.predict(forecast_start, forecast_end, dynamic=False) #print df # print df.iloc[-3:] #forecast= results.predict(start, end, dynamic=True) # print forecast #use ColumnDataSource to pass in data for tooltips sourceInc = ColumnDataSource(ColumnDataSource.from_df(df.loc[inc])) sourceDec = ColumnDataSource(ColumnDataSource.from_df(df.loc[dec])) sourceAll = ColumnDataSource(ColumnDataSource.from_df(df.loc[:])) #will not need this one because we are putting a separate hoover to the forecast line #sourceforecast=ColumnDataSource(ColumnDataSource.from_df(df.loc[:])) #the values for the tooltip come from ColumnDataSource hover = HoverTool( names=['source_Inc', 'source_Dec', 'volinc', 'voldec'], tooltips=[ ("Date", "@Date"), ("Open", "@Open"), ("Close", "@Close"), ("High", "@High"), ("Low", "@Low"), ("Volume", "@Volume"), ("Percent", "@changepercent"), # ("Forecast", "@Forecast"), ]) TOOLS = [CrosshairTool(), hover] # map dataframe indices to date strings and use as label overrides p = figure(plot_width=900, plot_height=500, tools=TOOLS, title=stock + " Candlestick with Custom Date") p.xaxis.major_label_overrides = { i: date.strftime('%Y-%m-%d') for i, date in enumerate(pd.to_datetime(df["Date"], format='%Y-%m-%d')) } p.yaxis.axis_label = "Price" p.xaxis.axis_label = "Date" p.grid.grid_line_alpha = 0.5 #this is the up tail r1 = p.segment(df.seq[inc], df.High[inc], df.seq[inc], df.Low[inc], color="green", name='seg_INC') #p.add_tools(HoverTool(renderers=[r1], tooltips=[('High', '@y0'), ("Low", "@y1"),])) #this is the bottom tail r2 = p.segment(df.seq[dec], df.High[dec], df.seq[dec], df.Low[dec], color="red", name='seg_DEC') #p.add_tools(HoverTool(renderers=[r2], tooltips=[('High', '@y0'), ("Low", "@y1"),])) #this is the candle body for the red dates p.rect(x='seq', y='mid', width=w, height='height', fill_color="green", name='source_Inc', line_color="green", legend='Close High', source=sourceInc) #this is the candle body for the green dates p.rect(x='seq', y='mid', width=w, height='height', fill_color="red", name='source_Dec', line_color="red", legend='Close Low', source=sourceDec) #this is where the ARIMA line #p.circle(df.seq, df['Forecast'], color='darkgrey', alpha=0.2, legend='Forecast') r3 = p.line(x='seq', y='Forcast_New', line_width=2, color='navy', legend='Forecast_line', source=sourceAll) p.add_tools( HoverTool(renderers=[r3], tooltips=[('Date', '@Date'), ('Forecast', '@Forcast_New')])) #r4 = p.line(df.seq, df['Forecast2'], line_width=2, color='yellow', legend='Future_Day1') #p.add_tools(HoverTool(renderers=[r4], tooltips=[('Forecast', '@y')])) p.legend.location = "top_left" #This is the histogram graph p2 = figure(width=p.plot_width, x_range=p.x_range, tools=TOOLS, height=150, title='Volume') p2.vbar(x='seq', top='volinc', width=1, bottom=0, color="green", source=sourceInc, name='volinc') p2.vbar(x='seq', top='voldec', width=1, bottom=0, color="red", source=sourceDec, name='voldec') p_all = (column(p, p2)) html = file_html(p_all, CDN, "my plot") return html