plt.plot(modelData['Close'])
plt.plot(valid[['Close', 'Predict']])
plt.legend(['Original', 'Actual', 'Predicted'])
plt.savefig(f'.\output\{ticker}-DecisionTree_{beginDate}_{endDate}')
plt.savefig('.\output\DecisionTree')
plt.show()
#
# Create new DataFrame with future business days-closing price populated
todaysDate = datetime.datetime.now().date()
futureDays = 10
us_bd = CustomBusinessDay(calendar=USFederalHolidayCalendar())
futureDates = pd.date_range(todaysDate, periods=futureDays, freq=us_bd)
combinedDFcol = ['Close', 'Predict', 'SM1', 'SM2', 'SM3', 'SM4']
futureDF = pd.DataFrame(index=futureDates, columns=combinedDFcol)
futureDF['Predict'] = model_fit.forecast(steps=futureDays)[0]
combinedDF = df.append(futureDF, ignore_index=False)
combinedDF.index.names = ['Date']
currInfo = yf.Ticker(ticker).info
#X_new = np.array(futureDates).astype(float).reshape(-1, 1)
#finalPredict = linear.predict(X_new)
#print(finalPredict)
print(combinedDF)
# Write to CSV
combinedDF.to_csv(f'.\output\{ticker}-CombinedDF_{beginDate}_{endDate}.csv')
combinedDF.to_csv('.\output\combinedDF.csv')
# Output future days to text file
futureDF = futureDF.drop(columns=['Close', 'SM1', 'SM2', 'SM3', 'SM4'], axis=0)
futureDF.index.names = ['Date']
with open(".\output\Prediced_Stock_Price.txt", "w") as f:
    print(futureDF, file=f)
f.close()
Esempio n. 2
0
ticker = 'PENN'
data_source = 'yahoo'
penn = DataReader(ticker,data_source,start,end)
penn = penn.reset_index()
penn['Symbol'] = 'PENN'

ticker = 'MGM'
data_source = 'yahoo'
mgm = DataReader(ticker,data_source,start,end)
mgm = mgm.reset_index()
mgm['Symbol'] = 'MGM'

ticker = 'WYNN'
data_source = 'yahoo'
wynn = DataReader(ticker,data_source,start,end)
wynn = wynn.reset_index()
wynn['Symbol'] = 'WYNN'

combined = penn.append(mgm).append(wynn)


sns.lineplot(x="Date", y="Volume",data=combined,hue="Symbol"); plt.show()
#sns.barplot(x=stock_data.index, y="Volume",data=stock_data)
plt.show()

#narrow2 = df[['Date','Market Capitalization','Sector','Symbol']]
##n2 = narrow2[narrow2['Sector'] == 'Consumer Staples']
#n3 = n2[n2['Market Capitalization'] > 100000000000]
#sns.lineplot(x="Date", y="Market Capitalization", hue="Symbol",data=n3)

print("--- %s minutes ---" % round((time.time() - start_time)/60,3))
Esempio n. 3
0
def stock_info(ticker):

    today = datetime.today()
    beginDate = '2020-01-01'
    endDate = datetime.datetime.now().date()

    script = "9ho5HG7o00PT-g"
    secret = "2CQTFbYyYp5aLEN7bHkKGO8X4E3YHQ"

    beginDate = '2020-12-01'
    endDate = datetime.today().strftime('%Y-%m-%d')

    def df_from_response(res):
        df = pd.DataFrame()

        for post in res.json()['data']['children']:
            df = df.append(
                {
                    'subreddit':
                    post['data']['subreddit'],
                    'title':
                    post['data']['title'],
                    'selftext':
                    post['data']['selftext'],
                    'num_comments':
                    post['data']['num_comments'],
                    'upvote_ratio':
                    post['data']['upvote_ratio'],
                    'date':
                    datetime.fromtimestamp(
                        post['data']['created_utc']).strftime('%Y-%m-%d'),
                    'ups':
                    post['data']['ups'],
                    'downs':
                    post['data']['downs'],
                    'score':
                    post['data']['score'],
                    'kind':
                    post['kind'],
                    'id':
                    post['data']['id'],
                },
                ignore_index=True)
        return df

    auth = requests.auth.HTTPBasicAuth(script, secret)
    data = {
        'grant_type': 'password',
        'username': '******',
        'password': '******'
    }

    headers = {'User-Agent': 'Final_Project/0.0.1'}

    request = requests.post('https://www.reddit.com/api/v1/access_token',
                            auth=auth,
                            data=data,
                            headers=headers)
    token = f"bearer {request.json()['access_token']}"
    headers = {**headers, **{'Authorization': token}}

    posts = pd.read_csv("trimmed_posts.csv")
    selected_cols = ['title', 'selftext']

    df = DataReader(ticker, 'yahoo', beginDate, endDate)
    df['Close'] = df['Adj Close']
    df = df.drop(columns=['Adj Close', 'High', 'Low', 'Open'], axis=1)
    modelData = df['Close'].to_frame()
    five_rolling = modelData.rolling(window=5).mean()
    ten_rolling = modelData.rolling(window=10).mean()
    twenty_rolling = modelData.rolling(window=20).mean()
    fifty_rolling = modelData.rolling(window=50).mean()
    hundred_rolling = modelData.rolling(window=100).mean()

    futureDays = 10
    modelData['Target'] = modelData['Close'].shift(-futureDays)

    X = np.array(modelData.drop(['Target'], 1))[:-futureDays]
    y = np.array(modelData['Target'])[:-futureDays]

    Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.25)
    Xfuture = modelData.drop(['Target'], 1)[:-futureDays]
    Xfuture = Xfuture.tail(futureDays)
    Xfuture = np.array(Xfuture)

    train_data, test_data = df[0:int(len(df) * 0.7)], df[int(len(df) * 0.75):]
    training_data = train_data['Close'].values
    test_data = test_data['Close'].values
    history = [x for x in training_data]
    model_predictions = []
    N_test_observations = len(test_data)
    for time_point in range(N_test_observations):
        model = ARIMA(history, order=(4, 1, 0))
        model_fit = model.fit(disp=0)
        output = model_fit.forecast()
        yhat = output[0]
        model_predictions.append(yhat)
        true_test_value = test_data[time_point]
        history.append(true_test_value)
    MSE_error = mean_squared_error(test_data, model_predictions)

    linear = LinearRegression().fit(Xtrain, ytrain)
    linearPrediction = linear.predict(Xfuture)
    linearResult = linear.score(Xtrain, ytrain)

    valid = modelData[X.shape[0]:]
    valid['Target'] = predictions

    tree = DecisionTreeRegressor().fit(Xtrain, ytrain)
    treePrediction = tree.predict(Xfuture)
    treeResult = tree.score(Xtrain, ytrain)
    predictions = treePrediction
    valid = modelData[X.shape[0]:]
    valid['Predict'] = predictions

    todaysDate = datetime.datetime.now().date()
    futureDays = 10
    us_bd = CustomBusinessDay(calendar=USFederalHolidayCalendar())
    futureDates = pd.date_range(todaysDate, periods=futureDays, freq=us_bd)
    combinedDFcol = ['Close', 'Predict', 'SM1', 'SM2', 'SM3', 'SM4']
    futureDF = pd.DataFrame(index=futureDates, columns=combinedDFcol)
    futureDF['Predict'] = model_fit.forecast(steps=futureDays)[0]
    combinedDF = df.append(futureDF, ignore_index=False)
    combinedDF.index.names = ['Date']
    currInfo = yf.Ticker(ticker).info

    currInfo = yf.Ticker(ticker).info
    infoDict = {
        'longName: ':
        currInfo['symbol'],
        'Current Ask/Bid: ':
        str(currInfo['ask']) + '/' + str(currInfo['bid']),
        'Open Price: ':
        str(round(currInfo['open'], 2)),
        'High/Low Price: ':
        str(currInfo['dayHigh']) + '/' + str(currInfo['dayLow']),
        'Avg Volume: ':
        str(currInfo['averageVolume']),
        'Volume: ':
        str(currInfo['volume']),
        '52w High: ':
        str(round(currInfo['fiftyTwoWeekHigh'], 2)),
        '52w Low: ':
        str(round(currInfo['fiftyTwoWeekLow'], 2)),
        'MorningStar Rating: ':
        str(currInfo['morningStarOverallRating']),
        'Short Ratio: ':
        str(currInfo['shortRatio'])
    }

    try:
        new_df = posts[posts[selected_cols].apply(
            lambda x: x.str.contains(ticker)).all(axis=1)]
        data = new_df
        data['Ticker'] = ticker
        group_df = new_df.groupby('created_utc') \
        .agg({'id':'count', 'num_comments':'mean', 'score':'mean'}) \
        .rename(columns={'id':'post_count','num_comments':'avg_comments', 'score':'avg_score'}) \
        .reset_index()
        group_df['Ticker'] = ticker
    except:
        places = {
            'created_utc': 0,
            'post_count': 0,
            'avg_comments': 0,
            'avg_score': 0,
            'ticker': ticker
        }
        group_df = pd.DataFrame.from_dict(places)

    super_posts_df = data.loc[data['score'] > 300]
    if not super_posts_df.empty:
        super_posts_df = super_posts_df.groupby(
            ['created_utc',
             'Ticker'])['score'].apply(lambda x: (x >= 300).sum()).reset_index(
                 name='Count_Score_300')

    try:
        pytrend = TrendReq(hl='en-US', tz=360)
        pytrend.build_payload(kw_list=[ticker],
                              timeframe=beginDate + ' ' + endDate,
                              geo='US')
        df = pytrend.interest_over_time()
        df['Noise'] = df[ticker]
        df[ticker] = ticker
        df.index.names = ['Date']
        df.columns = ['Ticker', 'isPartial', 'Noise']
        mergedNoise = df
    except:
        noise = {'Ticker': ticker, 'isPartial': 'No', 'Noise': 0}
        mergedNoise = pd.DataFrame.from_dict(noise)

    group_df['created_utc'] = pd.to_datetime(group_df['created_utc'])
    if not super_posts_df.empty:
        super_posts_df['created_utc'] = pd.to_datetime(
            super_posts_df['created_utc'])
    merged = group_df.merge(mergedNoise,
                            left_on=['created_utc', 'Ticker'],
                            right_on=['Date', 'Ticker'],
                            how='left')
    if not super_posts_df.empty:
        merged = merged.merge(super_posts_df,
                              left_on=['created_utc', 'Ticker'],
                              right_on=['created_utc', 'Ticker'],
                              how="left")
    else:
        merged['Count_Score_300'] = 0
    merged.drop(columns=['isPartial'])

    stockData = yf.download(ticker, start=beginDate, end=endDate)
    stockData['Ticker'] = ticker
    stockReport = pd.DataFrame(stockData,
                               columns=['Ticker', 'Adj Close', 'Volume'])
    merged = merged.merge(stockReport,
                          left_on=['created_utc', 'Ticker'],
                          right_on=['Date', 'Ticker'],
                          how='left')
    merged['post_count_change'] = merged['post_count'].pct_change()
    merged['avg_score_change'] = merged['avg_score'].pct_change()
    merged['Adj Close_change'] = merged['Adj Close'].pct_change()
    merged['Volume_change'] = merged['Volume'].pct_change()
    merged.replace([np.inf, -np.inf], np.nan, inplace=True)
    merged.replace([np.inf, -np.inf], np.nan).dropna(
        subset=['post_count_change', 'avg_score_change', 'Volume_change'],
        how="all")
    merged = merged.dropna()

    expected_posts = merged['post_count'].mean()
    expected_avg_comments = merged['avg_comments'].mean()
    expected_volume_change = merged['Volume'].mean()
    expected_300_count = merged['Count_Score_300'].mean()

    data = pd.DataFrame()
    length = 0
    try:
        params = {'limit': 100, 'q': ticker, 'restrict_sr': True}
        res = requests.get("https://oauth.reddit.com/r/WallStreetBets/search",
                           headers=headers,
                           params=params)

        new_df = df_from_response(res)
        new_df.sort_values(by=['date'], inplace=True, ascending=False, axis=0)
        row = new_df.iloc[len(new_df) - 1]
        fullname = row['kind'] + '_' + row['id']
        params['after'] = fullname
        data = data.append(new_df, ignore_index=True)
    except:
        data

    data['date'] = pd.to_datetime(data['date'])

    super_posts_live = data.groupby('date')['score'].apply(
        lambda x: (x >= 300).sum()).reset_index(name='Count_Score_300')
    if not super_posts_live.empty:
        super_posts_live = super_posts_live
    else:
        super_posts_live['Count_Score_300'] = 0

    try:
        data['Ticker'] = ticker
        live_group = data.groupby('date') \
        .agg({'id':'count', 'num_comments':'mean', 'score':'mean'}) \
        .rename(columns={'id':'post_count','num_comments':'avg_comments', 'score':'avg_score'}) \
        .reset_index()
        live_group['Ticker'] = ticker
    except:
        places = {
            'created_utc': 0,
            'post_count': 0,
            'avg_comments': 0,
            'avg_score': 0,
            'ticker': ticker
        }
        live_group = pd.DataFrame.from_dict(places)

    live_group = live_group.merge(super_posts_live,
                                  left_on=['date'],
                                  right_on=['date'],
                                  how="left")
    live_group = live_group.merge(stockReport,
                                  left_on=['date', 'Ticker'],
                                  right_on=['Date', 'Ticker'],
                                  how='left')
    live_group['Adj Close_change'] = live_group['Adj Close'].pct_change()
    live_group['Volume_change'] = live_group['Volume'].pct_change()
    live_group['post_count_change'] = live_group['post_count'].pct_change()
    live_group['avg_score_change'] = live_group['avg_score'].pct_change()

    xfits = live_group[live_group.date > datetime.now() -
                       pd.to_timedelta("3day")]
    xfits_dates = xfits['date']
    xfittings = xfits[[
        'post_count_change', 'avg_score_change', 'Volume_change',
        'Count_Score_300'
    ]]

    X_fits_scaled = X_scaler.transform(xfittings)
    social_predictions = model.predict(X_fits_scaled)
    social_predictions = pd.DataFrame(social_predictions.reshape(-1, 1))
    Xnew, _ = make_regression(n_samples=10,
                              n_features=4,
                              noise=0.01,
                              random_state=1)
    ynew = model.predict(Xnew)
    future_predict_df = pd.DataFrame(ynew.reshape(-1, 1))
    live_group.sort_values(by=['date'], inplace=True, ascending=False)

    future_dates = pd.date_range(start=today, periods=10).strftime('%Y-%m-%d')
    futureDates = pd.date_range(todaysDate, periods=futureDays, freq=us_bd)
    futureDates['SMPredict'] = df.append(future_predict_df, ignore_index=False)

    SMPredict = pd.DataFrame(index=future_dates, columns=combinedDFcol)
    future_predict_df = pd.DataFrame(ynew.reshape(-1, 1), future_dates)
    future_predict_df.rename(columns={0: "SMPredict"})
    SMPredict = SMPredict.merge(future_predict_df,
                                left_index=True,
                                right_index=True,
                                how="left")

    xfits = xfits.drop(columns=[
        'Ticker', 'Adj Close_change', 'Volume_change', 'post_count_change',
        'avg_score_change'
    ])
    xfits['SMPredictions'] = social_predictions
    xfits = xfits.set_index('date')
    xfits = df.append(SMPredict, ignore_index=False)
    xfits = xfits.drop(columns=['Ticker', 'isPartial', 'Noise'])
    xfits.index.name = "Date"

    combinedDFcol = ['Close', 'Predict', 'SM1', 'SM2', 'SM3', 'SM4']
    futureDF = pd.DataFrame(index=futureDates, columns=combinedDFcol)
    futureDF['Predict'] = model_fit.forecast(steps=futureDays)[0]
    combinedDF = df.append(futureDF, ignore_index=False)
    combinedDF.index.name = ['Date']

    today_dict = xfits.to_dict()
    futureSM_prediction = SMPredict.to_dict()
    carls_dict = combinedDF.to_dict()

    return ({today_dict, futureSM_prediction, carls_dict})
Esempio n. 4
0
def view_ticker():
    stock = request.form['ticker']
    #print stock
    start = request.form['start']
    start = datetime.strptime(start, '%Y-%m-%d')
    start = start.date()
    #print request.form['start']
    end = request.form['end']
    end = datetime.strptime(end, '%Y-%m-%d')
    end = end.date()
    #print "end"
    value = '.4'
    status = 'Close'

    #    if request.form.get('box1'):
    #        value = '.4'
    #        status = 'Close'
    #    if request.form.get('box2'):
    #        value = '.11'
    #        status = 'Adj Close'
    #    if request.form.get('box3'):
    #        value = '.1'
    #        status = 'Open'

    df = DataReader(stock, 'yahoo', start, end)
    #mydata = qd.get("WIKI/" + stock + value, rows = 20, api_key='oSvidbxNa84mVv7Kzqh2')

    df.reset_index(inplace=True, drop=False)

    #This is where ARIMA starts
    df['Natural Log'] = df['Close'].apply(lambda x: np.log(x))
    price_matrix = df['Close'].as_matrix()
    model = sm.tsa.ARIMA(price_matrix, order=(1, 0, 3))
    results = model.fit(
        disp=-1)  #disp=1 (disp < 0 means no output in this case. 1 = output)
    #df['Forecast'] = results.fittedvalues

    #Add one more day
    df['Date'] = pd.to_datetime(df['Date'])
    end_new = end + pd.offsets.BDay(1)
    df = df.append({'Date': end_new}, ignore_index=True)

    df['changepercent'] = df.Close.pct_change() * 100
    seqs = np.arange(df.shape[0])
    df["seq"] = pd.Series(seqs)
    df["Date"] = pd.to_datetime(df["Date"])
    df['Date'] = df['Date'].apply(lambda x: x.strftime('%Y/%m/%d'))
    df['changepercent'] = df['changepercent'].apply(
        lambda x: str(round(x, 2)) + "%")
    df['mid'] = df.apply(lambda x: (x['Open'] + x['Close']) / 2, axis=1)
    df['height'] = df.apply(
        lambda x: abs(x['Close'] - x['Open']
                      if x['Close'] != x['Open'] else 0.001),
        axis=1)

    inc = df.Close > df.Open
    dec = df.Open > df.Close
    w = 0.5

    #This is for volume graph
    df['volinc'] = df.Volume[inc]
    df['voldec'] = df.Volume[dec]

    #Add additional Forecast Day
    forecast_start = df.index[0]
    forecast_end = df.index[-1]
    #print forecast_start
    #print forecast_end

    #forcast = results.predict(forecast_start, forecast_end, dynamic=False) #, dynamic= True means in-sample
    df['Forcast_New'] = results.predict(forecast_start,
                                        forecast_end,
                                        dynamic=False)
    #print df

    # print df.iloc[-3:]
    #forecast= results.predict(start, end, dynamic=True)
    # print forecast

    #use ColumnDataSource to pass in data for tooltips
    sourceInc = ColumnDataSource(ColumnDataSource.from_df(df.loc[inc]))
    sourceDec = ColumnDataSource(ColumnDataSource.from_df(df.loc[dec]))
    sourceAll = ColumnDataSource(ColumnDataSource.from_df(df.loc[:]))
    #will not need this one because we are putting a separate hoover to the forecast line
    #sourceforecast=ColumnDataSource(ColumnDataSource.from_df(df.loc[:]))

    #the values for the tooltip come from ColumnDataSource
    hover = HoverTool(
        names=['source_Inc', 'source_Dec', 'volinc', 'voldec'],
        tooltips=[
            ("Date", "@Date"),
            ("Open", "@Open"),
            ("Close", "@Close"),
            ("High", "@High"),
            ("Low", "@Low"),
            ("Volume", "@Volume"),
            ("Percent", "@changepercent"),
            # ("Forecast", "@Forecast"),
        ])

    TOOLS = [CrosshairTool(), hover]

    # map dataframe indices to date strings and use as label overrides
    p = figure(plot_width=900,
               plot_height=500,
               tools=TOOLS,
               title=stock + " Candlestick with Custom Date")
    p.xaxis.major_label_overrides = {
        i: date.strftime('%Y-%m-%d')
        for i, date in enumerate(pd.to_datetime(df["Date"], format='%Y-%m-%d'))
    }

    p.yaxis.axis_label = "Price"
    p.xaxis.axis_label = "Date"
    p.grid.grid_line_alpha = 0.5

    #this is the up tail
    r1 = p.segment(df.seq[inc],
                   df.High[inc],
                   df.seq[inc],
                   df.Low[inc],
                   color="green",
                   name='seg_INC')
    #p.add_tools(HoverTool(renderers=[r1], tooltips=[('High', '@y0'), ("Low", "@y1"),]))

    #this is the bottom tail
    r2 = p.segment(df.seq[dec],
                   df.High[dec],
                   df.seq[dec],
                   df.Low[dec],
                   color="red",
                   name='seg_DEC')
    #p.add_tools(HoverTool(renderers=[r2], tooltips=[('High', '@y0'), ("Low", "@y1"),]))

    #this is the candle body for the red dates
    p.rect(x='seq',
           y='mid',
           width=w,
           height='height',
           fill_color="green",
           name='source_Inc',
           line_color="green",
           legend='Close High',
           source=sourceInc)
    #this is the candle body for the green dates
    p.rect(x='seq',
           y='mid',
           width=w,
           height='height',
           fill_color="red",
           name='source_Dec',
           line_color="red",
           legend='Close Low',
           source=sourceDec)

    #this is where the ARIMA line
    #p.circle(df.seq, df['Forecast'], color='darkgrey', alpha=0.2, legend='Forecast')
    r3 = p.line(x='seq',
                y='Forcast_New',
                line_width=2,
                color='navy',
                legend='Forecast_line',
                source=sourceAll)
    p.add_tools(
        HoverTool(renderers=[r3],
                  tooltips=[('Date', '@Date'), ('Forecast', '@Forcast_New')]))

    #r4 = p.line(df.seq, df['Forecast2'], line_width=2, color='yellow', legend='Future_Day1')
    #p.add_tools(HoverTool(renderers=[r4], tooltips=[('Forecast', '@y')]))

    p.legend.location = "top_left"

    #This is the histogram graph
    p2 = figure(width=p.plot_width,
                x_range=p.x_range,
                tools=TOOLS,
                height=150,
                title='Volume')
    p2.vbar(x='seq',
            top='volinc',
            width=1,
            bottom=0,
            color="green",
            source=sourceInc,
            name='volinc')
    p2.vbar(x='seq',
            top='voldec',
            width=1,
            bottom=0,
            color="red",
            source=sourceDec,
            name='voldec')

    p_all = (column(p, p2))

    html = file_html(p_all, CDN, "my plot")

    return html