def run(): journal = ledger.read_journal("./secret/ledger.dat") last_post = None amount = 0 for post in journal.query(""): if last_post == None or post.date == last_post.date: if str(post.amount.commodity) != "£": continue amount = amount + post.amount else: print post.date, ",", amount amount = 0 last_post = post df = pd.read_csv('./testing.csv') df['y'] = np.multiply(100, df['y']) m = Prophet() m.fit(df); forecast = m.predict(future) forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail() m.plot(forecast); m.plot_components(forecast);
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(10) y = np.array(forecast['yhat'])[-10:] y_l = np.array(forecast['yhat_lower'])[-10:] y_u = np.array(forecast['yhat_upper'])[-10:] for i in range(len(y)): y[i] = int (y[i]) y_l[i] = int (y_l[i]) y_u[i] = int (y_u[i]) print(y) print(y_l) print(y_u) future_plot = ph.plot(forecast, figsize=(14,8), xlabel='days', ylabel='cases') trends_weekly_dayly_plot = ph.plot_components(forecast, figsize=(14,8)) !pip install statsmodels #evaluate an ARIMA model using a walk-forward validation from pandas import read_csv from pandas import datetime from matplotlib import pyplot import statsmodels.api as sm #import statsmodels.tsa.arima #.model import ARIMA from sklearn.metrics import mean_squared_error from math import sqrt data_a = Daily_World_cases labels = np.array(data_a) size = int(len(labels) * 0.9) train, test = labels[0:size], labels[size:len(labels)]
yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=False, seasonality_prior_scale=0.1, interval_width=0.95, holidays_prior_scale=10, changepoint_prior_scale=0.15) model.add_country_holidays(country_name='JP') model.fit(df_train) forecast = model.predict(df) forecast[['ds','yhat']].head() model.plot_components(forecast) fig, ax = plt.subplots(figsize=(15,5)) ax.plot(df_train['ds'], df_train['y'], c='grey', marker='o', ms=3, linestyle='-', label='Train') ax.plot(df_test['ds'], df_test['y'], c='red', marker='o',ms=3, linestyle='-', label='Test') ax.plot(forecast['ds'], forecast['yhat'], c='blue', marker='o', ms=3, linestyle='-', label='Forecast', alpha=0.5) ax.legend() ax.set_xlabel('Date') ax.set_ylabel('Sales'); forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail() metric_df = forecast.set_index('ds')[['yhat']].join(df.set_index('ds').y).reset_index() metric_df.tail(5) metric_df.dropna(inplace=True) metric_df.tail()
class ProphetProfit: def __init__(self, engine, query, item): self.engine = engine self.query = query self.item = item self.df = None self.data = None self.m = None self.lmbda = None self.forecast = None self.ma = None self.item_list = None self.positive_trend = [] self.negative_trend = [] self.profit = pd.DataFrame() def sql_call(self): self.df = pd.read_sql(self.query, self.engine) self.df = self.df.sort_values(by='when') def prophet_fit(self, periods=31): mask = self.df['name_enus'] == self.item self.data = self.df[mask][['when', 'priceavg']].rename(columns={ 'when': 'ds', 'priceavg': 'y' }) self.data['ds'] = pd.to_datetime(self.data['ds']) # remove outliers std = self.data['y'].std() * 1.5 mean = self.data['y'].mean() self.data = self.data[(self.data['y'] < mean + std) & (self.data['y'] > mean - std)] # box-cox transformation # yt, self.lmbda = stats.boxcox(self.data['y']) # self.data['y'] = yt # fit self.m = Prophet(n_changepoints=20) self.m.add_seasonality(period=30.4, fourier_order=5, name='monthly') self.m.fit(self.data) future = self.m.make_future_dataframe(periods) self.forecast = self.m.predict(future) # create moving average colunmn # self.forecast['yhat'] = inv_boxcox(self.forecast['yhat'], self.lmbda) # self.data['y'] = inv_boxcox(self.data['y'], self.lmbda) self.ma = pd.concat([ self.data['y'].reset_index(drop=True), self.forecast[['ds', 'yhat', 'trend']] ], axis=1) self.ma['7day'] = self.ma['trend'].rolling(7).mean() self.ma.loc[(self.ma['trend'] > self.ma['7day']), 'trend_pos'] = 1 self.ma.loc[(self.ma['trend'] < self.ma['7day']), 'trend_pos'] = -1 def plot(self): self.sql_call() self.prophet_fit() fig1 = self.m.plot(self.forecast) fig2 = self.m.plot_components(self.forecast) plt.show() def make_lists(self, buy_date): self.sql_call() self.item_list = self.df.groupby('name_enus').mean().sort_values( 'quantityavg')[::-1] buy_date = np.datetime64( datetime.datetime.strptime(buy_date, '%Y-%m-%d').date()) for item in self.item_list.index: self.item = item if self.profit.shape[1] < 10: self.prophet_fit() if self.ma['trend_pos'].iloc[-31:-39:-1].sum() > 1: try: buy = self.data[self.data['ds'] == buy_date]['y'].values[0] except: buy = self.data[self.data['ds'] == ( buy_date - np.timedelta64(1, 'D'))]['y'].values[0] profit_temp = self.forecast[ self.forecast['ds'] > buy_date][[ 'ds', 'yhat' ]].reset_index(drop=True) profit_temp['buy'] = buy profit_temp[ item] = profit_temp['yhat'] - profit_temp['buy'] self.profit = pd.concat([self.profit, profit_temp[item]], axis=1) elif self.ma['trend_pos'].iloc[-31:-39:-1].sum() < -1: self.negative_trend.append(self.item) else: pickle.dump(self.profit, open('../data/profit_df.pkl', 'wb')) break def cross_val(self): df_cv = cross_validation(self.m, initial='62 days', period='1 days', horizon='7 days') # for col in ['yhat', 'yhat_lower', 'yhat_upper', 'y']: # df_cv[col] = inv_boxcox(df_cv[col], lmbda) print(df_cv.sort_values('ds').tail()) df_p = performance_metrics(df_cv) print(df_p) def mabp_random(self): df = pd.read_pickle('../data/profit_df.pkl') df.fillna(df.mean()) scaler = RobustScaler().fit(df) df = scaler.transform(df) N = df.shape[0] d = df.shape[1] selected = [] total_reward = 0 total_profit = 0 for n in range(0, N): item = random.randrange(d) selected.append(item) reward = df[n, item] profit = scaler.inverse_tranform(df)[n, item] total_reward = total_reward + reward total_profit = total_profit + profit return pd.Series(selected).value_counts(normalize=True) def mapb_ucb(self): df = pd.read_pickle('../data/profit_df.pkl') df.fillna(df.mean()) scaler = RobustScaler().fit(df) df = scaler.transform(df) N = df.shape[0] d = df.shape[1] selected = [] numbers_of_selections = [0] * d sums_of_reward = [0] * d total_reward = 0 total_profit = 0 for n in range(0, N): item = 0 max_upper_bound = 0 for i in range(0, d): if (numbers_of_selections[i] > 0): average_reward = sums_of_reward[i] / numbers_of_selections[ i] delta_i = math.sqrt(2 * math.log(n + 1) / numbers_of_selections[i]) upper_bound = average_reward + delta_i else: upper_bound = 1e400 if upper_bound > max_upper_bound: max_upper_bound = upper_bound item = i selected.append(item) numbers_of_selections[item] += 1 reward = df[n, item] profit = scaler.inverse_tranform(df)[n, item] sums_of_reward[item] += reward total_reward += reward total_profit += profit return pd.Series(selected).value_counts(normalize=True)
future = prophet.make_future_dataframe( periods=90) #, include_history=False df_cv = cross_validation(prophet, '90 days', initial='270 days', period='90 days') print(df_cv) plt.figure(1) plt.plot(df_cv['ds'], df_cv['y']) plt.plot(df_cv['ds'], df_cv['yhat']) plt.grid(True) plt.show() break forecast = prophet.predict(future) # print(forecast) prophet.plot_components(forecast) # plt.grid(True) # plt.figure(3) # plt.plot(forecast['ds'], forecast['yhat']) # plt.grid(True) # plt.show() remain = forecast.ix[89, ['yhat']] res[i] = remain df3 = pd.DataFrame.from_dict(res, orient='index') df3.columns = ['a'] p = df3.sort_values(by='a', axis=0, ascending=False).head(20) x = p.index # def save_result(re_na, QRTA, ISIR, VOLUME, top20): # with open(re_na+'.txt', 'w') as fr: # fr.writelines(['QRTA'+'\t'+str(QRTA)+'\n', 'ISIR'+'\t'+str(ISIR)+'\n', 'VOLUME'+'\t'+str(VOLUME)+'\n'])
def mod_prophet(train, test, dependent_var_col, outpath, name, changepoints=None, freq='D', n_changepoints=10, reg_cols=None, country_iso_code='ES', change_scale=0.05): """ This function performs the training and testing of a prophet model and returns the main performance metrics :param train: dataset with train data :param test: dataset with test data. The columns should be the same ones than in the train data :param ts_col: name of the column with the objective variable :param outpath: path to save the files and plots in :param name: name of the series to use when saving the plots :param changepoints: list of dates where a break in the series is added manually. Defaults to None. :param freq: frequency of the series ('D' for daily, 'W' for weekly, 'M' for monthly). Defaults to daily. :param n_changepoints: Number of changepoints to be used in the model. Defaults to 10. :param reg_cols: list of names of the columns in the dataframe to be added as regressors in the model. :param country_iso_code: country code to use the holidays of each one. :param change_scale: rate of learning in the prophet model. Defaults to 0.05 :return: mae, rmse, mape, name, predictions, conf_intervals """ # path definition if name not in os.listdir(outpath): os.mkdir(outpath + name) print('creating output folder in: \n', outpath + name) report_output_path = str(outpath) + str(name) + '/' # join both dataframes to plot when the model is done train.index = pd.to_datetime(train.index) test.index = pd.to_datetime(test.index) orig_df = train.append(test) if changepoints != None: changepoints = list(pd.to_datetime(changepoints)) changepoints = [ date for date in changepoints if date < train.index.max() ] if len(changepoints) == 0: changepoints = None if changepoints is None: mod = Prophet(n_changepoints=n_changepoints, yearly_seasonality=True, changepoint_prior_scale=change_scale, changepoint_range=0.95, seasonality_mode='additive') else: mod = Prophet(changepoints=changepoints, yearly_seasonality=True, changepoint_prior_scale=change_scale, changepoint_range=0.95, seasonality_mode='additive') mod.add_country_holidays(country_name=country_iso_code) if reg_cols is not None: for regressor in reg_cols: mod.add_regressor(regressor, standardize=False, mode='multiplicative') print('adding regressor: ', regressor, '\n') reg_cols.append(dependent_var_col) cols = reg_cols.copy() reg_cols.remove(dependent_var_col) tr_df = train[cols].reset_index(drop=False) else: tr_df = train[[dependent_var_col]].reset_index(drop=False) if 'Date' not in list(tr_df.columns): tr_df.reset_index(drop=False, inplace=True) tr_df = tr_df.rename(columns={'Date': 'ds', dependent_var_col: 'y'}) print(tr_df.head()) # fit the data mod.fit(tr_df) # forecast future = mod.make_future_dataframe(periods=test.shape[0], freq=freq) if reg_cols is not None: for column in reg_cols: # change after testing future[str(column)] = 0 forecast = mod.predict(future) mae = mean_absolute_error(y_pred=forecast['yhat'].tail(test.shape[0]), y_true=test[dependent_var_col]) rmse = np.sqrt( mean_squared_error(y_pred=forecast['yhat'].tail(test.shape[0]), y_true=test[dependent_var_col])) mape = mean_absolute_percentage_error(y_pred=forecast['yhat'].tail( test.shape[0]), y_true=test[dependent_var_col]) plot = mod.plot(forecast, xlabel='Date', ylabel=dependent_var_col) a = add_changepoints_to_plot(plot.gca(), mod, forecast) plt.savefig(report_output_path + 'fc_plot_' + name + '.png') plt.close('all') # components plot mod.plot_components(forecast) plt.savefig(report_output_path + name + 'components_plot.png') plt.close('all') deltas = mod.params['delta'].mean(0) fig = plt.figure(facecolor='w') ax = fig.add_subplot(111) ax.bar(range(len(deltas)), deltas) ax.grid(True, which='major', c='gray', ls='-', lw=1, alpha=0.2) ax.set_ylabel('Rate change') ax.set_xlabel('Potential changepoint') fig.tight_layout() # Create a text box for the iteration results mod.plot_components(forecast) fig = plot_plotly(mod, forecast) py.plot(fig, filename=report_output_path + name + 'fbprophet_plot.html', auto_open=False) aux_res_df = forecast.set_index('ds', drop=True) aux_res_df.index = pd.to_datetime(aux_res_df.index) aux_res_df = orig_df.merge(aux_res_df, left_index=True, right_index=True) predictions = aux_res_df.tail(test.shape[0]) conf_intervals = forecast[['yhat_lower', 'yhat_upper']] aux_res_df = aux_res_df.tail(180) fig, ax = plt.subplots(figsize=(15, 10)) plt.plot(aux_res_df["yhat"], color='darkred', label='Forecast') plt.plot(aux_res_df[dependent_var_col], color='darkblue', label='Real') plt.plot(aux_res_df['yhat_lower'], color='darkgreen', label='Upper', alpha=0.5) plt.plot(aux_res_df['yhat_upper'], color='darkgreen', label='Lower', alpha=0.5) ax.fill_between(test.index, aux_res_df['yhat_lower'].tail(test.shape[0]), aux_res_df['yhat_upper'].tail(test.shape[0]), facecolor='darkgreen', alpha=0.2, interpolate=True) plt.axvline(x=pd.to_datetime(test.index.min(), format='%Y-%m-%d'), color='grey', linestyle='--') ax.xaxis.set_major_locator(mticker.MultipleLocator(10000)) plt.gcf().autofmt_xdate() # generate a text box props = dict(boxstyle='round', facecolor='white') # place a text box in upper left in axes coords textstr = 'MAE:' + str(round(mae, 0)) + '\n' + 'MAPE:' + str(round( mape, 2)) ax.text(0.05, 0.95, textstr, transform=ax.transAxes, fontsize=14, verticalalignment='top', bbox=props) plt.legend() plt.savefig(report_output_path + 'pfc_' + name + '_' + str( datetime.strftime(pd.to_datetime(test.index.min()), format='%Y-%m-%d')) + '.png') plt.close('all') return mae, rmse, mape, name, predictions, conf_intervals
plt.ylabel('Number of Crimes') #Preparing the data for the prediction by using prophet data_prophet = data.resample('M').size().reset_index() data_prophet data_prophet.columns = ['Date', 'Crime Count'] data_prophet data_prophet_df = pd.DataFrame(data_prophet) data_prophet_df #renaming the columns in order to adapt it to the prediction data_prophet_df2 = data_prophet_df.rename(columns={'Date':'ds', 'Crime Count':'y'}) data_prophet_df2 m = Prophet() m.fit(data_prophet_df2) # Forcasting into the future future = m.make_future_dataframe(periods=365) forecast = m.predict(future) forecast #Visualize figure = m.plot(forecast, xlabel='Date', ylabel='Crime Rate') #Visualizing the trend for the future years figure3 = m.plot_components(forecast)
#prediction prediction=model.predict(future_dates) prediction.head() prediction[["ds","yhat","yhat_lower","yhat_upper"]].tail() prediction[["ds","yhat","yhat_lower","yhat_upper"]].tail() #predction projection model.plot(prediction) #visualise each componenets tends and weekly model.plot_components(prediction) from fbprophet.diagnostics import cross_validation df.shape df_cv=cross_validation(model,horizon='730 days',period='180 days',initial='1095 days') df_cv.head() from fbprophet.diagnostics import performance_metrics df_performance=performance_metrics(df_cv) df_performance.head() from fbprophet.plot import plot_cross_validation_metric fig=plot_cross_validation_metric(df_cv,metric='mse')
def main(): df = load_data() #page = st.sidebar.radio("Choose a page", ["Homepage", "SignUp"]) verified = "True" result = "F.A.S.T. WebApp - For Interview Demo" st.sidebar.title(result) st.sidebar.write( "Created By: Akash M Dubey [LinkedIn](https://www.linkedin.com/in/akashmdubey/)" ) st.sidebar.write( "Checkout more projects at [www.akashmdubey.com/projects](https://akashmdubey.com/)" ) page = st.sidebar.radio("Choose a Function", [ "About the Project", "Live News Sentiment", "Company Basic Details", "Company Advanced Details", "Stock Future Prediction", "Google Trends with Forecast", "Twitter Trends", "Meeting Summarization" ]) if page == "Google Trends with Forecast": st.sidebar.write(""" ## Choose a keyword and a prediction period """) keyword = st.sidebar.text_input("Keyword", "Company name") periods = st.sidebar.slider('Prediction time in days:', 7, 365, 90) # main section st.write(""" # Welcome to Trend Predictor App ### This app predicts the **Google Trend** you want! """) st.image( 'https://s3.eu-west-2.amazonaws.com/cdn.howtomakemoneyfromhomeuk.com/wp-content/uploads/2020/10/Google-Trends.jpg', width=350, use_column_width=200) st.write("Evolution of interest:", keyword) df = get_data(keyword) forecast, fig1, fig2 = make_pred(df, periods) st.pyplot(fig1) st.write("Trends Over the Years and Months") st.pyplot(fig2) elif page == "About the Project": st.title('Data Sources') st.write(""" ### Our F.A.S.T application have 3 data sources for two different use cases: #### 1. Web Scrapping to get Live News Data #### 2. Twitter API to get Real time Tweets #### 3. Google Trends API to get Real time Trends """) st.text('') link = '[Project Report](https://codelabs-preview.appspot.com/?file_id=1qxniFjwkDir6NT17KkvS1zDbmIgawcrEEwbbfCtAk8k#1)' st.markdown(link, unsafe_allow_html=True) st.title('AWS Data Architecture') st.image('./Images/Architecture Final AWS_FAST.jpg', width=900, use_column_width=1200) st.title('Dashboard') import streamlit.components.v1 as components components.iframe( "https://app.powerbi.com/view?r=eyJrIjoiZjMzMGUyZTEtM2RiMS00NzFlLWE3MWMtZDgzMjIxNTgxYmY3IiwidCI6ImE4ZWVjMjgxLWFhYTMtNGRhZS1hYzliLTlhMzk4YjkyMTVlNyIsImMiOjN9&pageName=ReportSection842eec15de524192b588", height=600, width=900) elif page == "Meeting Summarization": symbols = [ './Audio Files/Meeting 1.mp3', './Audio Files/Meeting 2.mp3', './Audio Files/Meeting 3.mp3', './Audio Files/Meeting 4.mp3' ] track = st.selectbox('Choose a the Meeting Audio', symbols) st.audio(track) data_dir = './inference-data/' ratiodata = st.text_input( "Please Enter a Ratio you want summary by: (TRY: 0.01)") if st.button("Generate a Summarized Version of the Meeting"): time.sleep(2.4) #st.success("This is the Summarized text of the Meeting Audio Files xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx xxxxxxgeeeeeeeeeeeeeee eeeeeeeeeeeeeehjjjjjjjjjjjjjjjsdbjhvsdk vjbsdkvjbsdvkb skbdv") if track == "./Audio Files/Meeting 2.mp3": user_input = "NKE" time.sleep(1.4) try: with open(data_dir + user_input) as f: st.success(summarize(f.read(), ratio=float(ratiodata))) #print() st.warning("Sentiment: Negative") except: st.text("Please Enter a valid Decimal value like 0.01") else: user_input = "AGEN" time.sleep(1.4) try: with open(data_dir + user_input) as f: st.success(summarize(f.read(), ratio=float(ratiodata))) #print() st.success("Sentiment: Positive") except: st.text("Please Enter a valid Decimal value like 0.01") elif page == "Twitter Trends": st.write(""" # Welcome to Twitter Sentiment App ### This app predicts the **Twitter Sentiments** you want! """) st.image( 'https://assets.teenvogue.com/photos/56b4f21327a088e24b967bb6/3:2/w_531,h_354,c_limit/twitter-gifs.gif', width=250, use_column_width=200) #st.subheader("Select a topic which you'd like to get the sentiment analysis on :") ################# Twitter API Connection ####################### consumer_key = "MaA51EmeZbgYazwFYOZxNRZR5" consumer_secret = "6ZUmFGFhiNzePsbkiPlKRBF7R9nq2dkDqfyfx7uU5eNgDhR8ci" access_token = "1359729189700722691-g8oMz8ONW6qtvibbQqqc6OAXJCIjeE" access_token_secret = "FzXBd0XRy2yZmtpH90GuyMaclmnyPGQEdxfCBO68BI0nb" # Use the above credentials to authenticate the API. auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) api = tweepy.API(auth) ################################################################ df = pd.DataFrame(columns=[ "Date", "User", "IsVerified", "Tweet", "Likes", "RT", 'User_location' ]) # Write a Function to extract tweets: def get_tweets(Topic, Count): i = 0 #my_bar = st.progress(100) # To track progress of Extracted tweets for tweet in tweepy.Cursor(api.search, q=Topic, count=100, lang="en", exclude='retweets').items(): #time.sleep(0.1) #my_bar.progress(i) df.loc[i, "Date"] = tweet.created_at df.loc[i, "User"] = tweet.user.name df.loc[i, "IsVerified"] = tweet.user.verified df.loc[i, "Tweet"] = tweet.text df.loc[i, "Likes"] = tweet.favorite_count df.loc[i, "RT"] = tweet.retweet_count df.loc[i, "User_location"] = tweet.user.location #df.to_csv("TweetDataset.csv",index=False) #df.to_excel('{}.xlsx'.format("TweetDataset"),index=False) ## Save as Excel i = i + 1 if i > Count: break else: pass # Function to Clean the Tweet. def clean_tweet(tweet): return ' '.join( re.sub( '(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)|([RT])', ' ', tweet.lower()).split()) # Funciton to analyze Sentiment def analyze_sentiment(tweet): analysis = TextBlob(tweet) if analysis.sentiment.polarity > 0: return 'Positive' elif analysis.sentiment.polarity == 0: return 'Neutral' else: return 'Negative' #Function to Pre-process data for Worlcloud def prepCloud(Topic_text, Topic): Topic = str(Topic).lower() Topic = ' '.join(re.sub('([^0-9A-Za-z \t])', ' ', Topic).split()) Topic = re.split("\s+", str(Topic)) stopwords = set(STOPWORDS) stopwords.update( Topic ) ### Add our topic in Stopwords, so it doesnt appear in wordClous ### text_new = " ".join( [txt for txt in Topic_text.split() if txt not in stopwords]) return text_new # Collect Input from user : Topic = str() Topic = str( st.sidebar.text_input( "Enter the topic you are interested in (Press Enter once done)", "enter company name")) if len(Topic) > 0: # Call the function to extract the data. pass the topic and filename you want the data to be stored in. with st.spinner("Please wait, Tweets are being extracted"): get_tweets(Topic, Count=200) st.success('Tweets have been Extracted !!!!') # Call function to get Clean tweets df['clean_tweet'] = df['Tweet'].apply(lambda x: clean_tweet(x)) # Call function to get the Sentiments df["Sentiment"] = df["Tweet"].apply(lambda x: analyze_sentiment(x)) # Write Summary of the Tweets st.write("Total Tweets Extracted for Topic '{}' are : {}".format( Topic, len(df.Tweet))) st.write("Total Positive Tweets are : {}".format( len(df[df["Sentiment"] == "Positive"]))) st.write("Total Negative Tweets are : {}".format( len(df[df["Sentiment"] == "Negative"]))) st.write("Total Neutral Tweets are : {}".format( len(df[df["Sentiment"] == "Neutral"]))) # See the Extracted Data : if st.button("See the Extracted Data"): #st.markdown(html_temp, unsafe_allow_html=True) st.success("Below is the Extracted Data :") st.write(df.head(50)) # get the countPlot if st.button("Get Count Plot for Different Sentiments"): st.success("Generating A Count Plot") st.subheader(" Count Plot for Different Sentiments") st.write(sns.countplot(df["Sentiment"], palette="Blues")) st.pyplot() # Piechart if st.button("Get Pie Chart for Different Sentiments"): st.success("Generating A Pie Chart") a = len(df[df["Sentiment"] == "Positive"]) b = len(df[df["Sentiment"] == "Negative"]) c = len(df[df["Sentiment"] == "Neutral"]) d = np.array([a, b, c]) explode = (0.1, 0.0, 0.1) st.write( plt.pie(d, shadow=True, explode=explode, labels=["Positive", "Negative", "Neutral"], autopct='%1.2f%%')) st.pyplot() # get the countPlot Based on Verified and unverified Users if st.button( "Get Count Plot Based on Verified and unverified Users"): st.success( "Generating A Count Plot (Verified and unverified Users)") st.subheader( " Count Plot for Different Sentiments for Verified and unverified Users" ) st.write(sns.countplot(df["Sentiment"], hue=df.IsVerified)) st.pyplot() ## Points to add 1. Make Backgroud Clear for Wordcloud 2. Remove keywords from Wordcloud # Create a Worlcloud if st.button("Get WordCloud for all things said about {}".format( Topic)): st.success( "Generating A WordCloud for all things said about {}". format(Topic)) text = " ".join(review for review in df.clean_tweet) stopwords = set(STOPWORDS) text_newALL = prepCloud(text, Topic) wordcloud = WordCloud( stopwords=stopwords, max_words=800, max_font_size=75, colormap="Blues", background_color="black").generate(text_newALL) st.write(plt.imshow(wordcloud, interpolation='bilinear')) st.pyplot() #Wordcloud for Positive tweets only if st.button( "Get WordCloud for all Positive Tweets about {}".format( Topic)): st.success( "Generating A WordCloud for all Positive Tweets about {}". format(Topic)) text_positive = " ".join(review for review in df[ df["Sentiment"] == "Positive"].clean_tweet) stopwords = set(STOPWORDS) text_new_positive = prepCloud(text_positive, Topic) #text_positive=" ".join([word for word in text_positive.split() if word not in stopwords]) wordcloud = WordCloud( stopwords=stopwords, max_words=800, max_font_size=75, colormap="Greens", background_color="black").generate(text_new_positive) st.write(plt.imshow(wordcloud, interpolation='bilinear')) st.pyplot() #Wordcloud for Negative tweets only if st.button( "Get WordCloud for all Negative Tweets about {}".format( Topic)): st.success( "Generating A WordCloud for all Positive Tweets about {}". format(Topic)) text_negative = " ".join(review for review in df[ df["Sentiment"] == "Negative"].clean_tweet) stopwords = set(STOPWORDS) text_new_negative = prepCloud(text_negative, Topic) #text_negative=" ".join([word for word in text_negative.split() if word not in stopwords]) wordcloud = WordCloud( stopwords=stopwords, max_words=800, max_font_size=75, colormap="Reds", background_color="black").generate(text_new_negative) st.write(plt.imshow(wordcloud, interpolation='bilinear')) st.pyplot() #st.sidebar.subheader("Scatter-plot setup") #box1 = st.sidebar.selectbox(label= "X axis", options = numeric_columns) #box2 = st.sidebar.selectbox(label="Y axis", options=numeric_columns) #sns.jointplot(x=box1, y= box2, data=df, kind = "reg", color= "red") #st.pyplot() elif page == "Stock Future Prediction": snp500 = pd.read_csv("./Datasets/SP500.csv") symbols = snp500['Symbol'].sort_values().tolist() ticker = st.sidebar.selectbox('Choose a S&P 500 Stock', symbols) START = "2015-01-01" TODAY = date.today().strftime("%Y-%m-%d") st.title('Stock Forecast App') st.image( 'https://media2.giphy.com/media/JtBZm3Getg3dqxK0zP/giphy-downsized-large.gif', width=250, use_column_width=200) # stocks = ('GOOG', 'AAPL', 'MSFT', 'GME', 'W', 'TSLA') # selected_stock = st.selectbox('Select dataset for prediction', stocks) n_years = st.slider('Years of prediction:', 1, 4) period = n_years * 365 st.title('Stock Forecast App To Do part in stockapp.py') data_load_state = st.text('Loading data...') data = yf.download(ticker, START, TODAY) data.reset_index(inplace=True) data_load_state.text('Loading data... done!') st.subheader('Raw data') st.write(data.tail()) # Plot raw data def plot_raw_data(): fig = go.Figure() fig.add_trace( go.Scatter(x=data['Date'], y=data['Open'], name="stock_open")) fig.add_trace( go.Scatter(x=data['Date'], y=data['Close'], name="stock_close")) fig.layout.update(title_text='Time Series data with Rangeslider', xaxis_rangeslider_visible=True) st.plotly_chart(fig) plot_raw_data() # Predict forecast with Prophet. df_train = data[['Date', 'Close']] df_train = df_train.rename(columns={"Date": "ds", "Close": "y"}) m = Prophet() m.fit(df_train) future = m.make_future_dataframe(periods=period) forecast = m.predict(future) # Show and plot forecast st.subheader('Forecast data') st.write(forecast.tail()) st.write(f'Forecast plot for {n_years} years') fig1 = plot_plotly(m, forecast) st.plotly_chart(fig1) st.write("Forecast components") fig2 = m.plot_components(forecast) st.write(fig2) elif page == "Company Advanced Details": snp500 = pd.read_csv("./Datasets/SP500.csv") symbols = snp500['Symbol'].sort_values().tolist() ticker = st.sidebar.selectbox('Choose a S&P 500 Stock', symbols) stock = yf.Ticker(ticker) def calcMovingAverage(data, size): df = data.copy() df['sma'] = df['Adj Close'].rolling(size).mean() df['ema'] = df['Adj Close'].ewm(span=size, min_periods=size).mean() df.dropna(inplace=True) return df def calc_macd(data): df = data.copy() df['ema12'] = df['Adj Close'].ewm(span=12, min_periods=12).mean() df['ema26'] = df['Adj Close'].ewm(span=26, min_periods=26).mean() df['macd'] = df['ema12'] - df['ema26'] df['signal'] = df['macd'].ewm(span=9, min_periods=9).mean() df.dropna(inplace=True) return df def calcBollinger(data, size): df = data.copy() df["sma"] = df['Adj Close'].rolling(size).mean() df["bolu"] = df["sma"] + 2 * df['Adj Close'].rolling(size).std( ddof=0) df["bold"] = df["sma"] - 2 * df['Adj Close'].rolling(size).std( ddof=0) df["width"] = df["bolu"] - df["bold"] df.dropna(inplace=True) return df st.title('Company Stocks Advanced Details') st.subheader('Moving Average') coMA1, coMA2 = st.beta_columns(2) with coMA1: numYearMA = st.number_input('Insert period (Year): ', min_value=1, max_value=10, value=2, key=0) with coMA2: windowSizeMA = st.number_input('Window Size (Day): ', min_value=5, max_value=500, value=20, key=1) start = dt.datetime.today() - dt.timedelta(numYearMA * 365) end = dt.datetime.today() dataMA = yf.download(ticker, start, end) df_ma = calcMovingAverage(dataMA, windowSizeMA) df_ma = df_ma.reset_index() figMA = go.Figure() figMA.add_trace( go.Scatter(x=df_ma['Date'], y=df_ma['Adj Close'], name="Prices Over Last " + str(numYearMA) + " Year(s)")) figMA.add_trace( go.Scatter(x=df_ma['Date'], y=df_ma['sma'], name="SMA" + str(windowSizeMA) + " Over Last " + str(numYearMA) + " Year(s)")) figMA.add_trace( go.Scatter(x=df_ma['Date'], y=df_ma['ema'], name="EMA" + str(windowSizeMA) + " Over Last " + str(numYearMA) + " Year(s)")) figMA.update_layout( legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01)) figMA.update_layout(legend_title_text='Trend') figMA.update_yaxes(tickprefix="$") st.plotly_chart(figMA, use_container_width=True) st.subheader('Moving Average Convergence Divergence (MACD)') numYearMACD = st.number_input('Insert period (Year): ', min_value=1, max_value=10, value=2, key=2) startMACD = dt.datetime.today() - dt.timedelta(numYearMACD * 365) endMACD = dt.datetime.today() dataMACD = yf.download(ticker, startMACD, endMACD) df_macd = calc_macd(dataMACD) df_macd = df_macd.reset_index() figMACD = make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.01) figMACD.add_trace(go.Scatter(x=df_macd['Date'], y=df_macd['Adj Close'], name="Prices Over Last " + str(numYearMACD) + " Year(s)"), row=1, col=1) figMACD.add_trace(go.Scatter(x=df_macd['Date'], y=df_macd['ema12'], name="EMA 12 Over Last " + str(numYearMACD) + " Year(s)"), row=1, col=1) figMACD.add_trace(go.Scatter(x=df_macd['Date'], y=df_macd['ema26'], name="EMA 26 Over Last " + str(numYearMACD) + " Year(s)"), row=1, col=1) figMACD.add_trace(go.Scatter(x=df_macd['Date'], y=df_macd['macd'], name="MACD Line"), row=2, col=1) figMACD.add_trace(go.Scatter(x=df_macd['Date'], y=df_macd['signal'], name="Signal Line"), row=2, col=1) figMACD.update_layout(legend=dict( orientation="h", yanchor="bottom", y=1, xanchor="left", x=0)) figMACD.update_yaxes(tickprefix="$") st.plotly_chart(figMACD, use_container_width=True) st.subheader('Bollinger Band') coBoll1, coBoll2 = st.beta_columns(2) with coBoll1: numYearBoll = st.number_input('Insert period (Year): ', min_value=1, max_value=10, value=2, key=6) with coBoll2: windowSizeBoll = st.number_input('Window Size (Day): ', min_value=5, max_value=500, value=20, key=7) startBoll = dt.datetime.today() - dt.timedelta(numYearBoll * 365) endBoll = dt.datetime.today() dataBoll = yf.download(ticker, startBoll, endBoll) df_boll = calcBollinger(dataBoll, windowSizeBoll) df_boll = df_boll.reset_index() figBoll = go.Figure() figBoll.add_trace( go.Scatter(x=df_boll['Date'], y=df_boll['bolu'], name="Upper Band")) figBoll.add_trace( go.Scatter(x=df_boll['Date'], y=df_boll['sma'], name="SMA" + str(windowSizeBoll) + " Over Last " + str(numYearBoll) + " Year(s)")) figBoll.add_trace( go.Scatter(x=df_boll['Date'], y=df_boll['bold'], name="Lower Band")) figBoll.update_layout(legend=dict( orientation="h", yanchor="bottom", y=1, xanchor="left", x=0)) figBoll.update_yaxes(tickprefix="$") st.plotly_chart(figBoll, use_container_width=True) elif page == "Live News Sentiment": st.image('https://www.visitashland.com/files/latestnews.jpg', width=250, use_column_width=200) snp500 = pd.read_csv("./Datasets/SP500.csv") symbols = snp500['Symbol'].sort_values().tolist() ticker = st.sidebar.selectbox('Choose a S&P 500 Stock', symbols) if st.button("Click here to See Latest News about " + ticker + ""): st.header('Latest News') def newsfromfizviz(temp): # time.sleep(5) finwiz_url = 'https://finviz.com/quote.ashx?t=' news_tables = {} tickers = [temp] for ticker in tickers: url = finwiz_url + ticker req = Request(url=url, headers={'user-agent': 'my-app/0.0.1'}) response = urlopen(req) # Read the contents of the file into 'html' html = BeautifulSoup(response) # Find 'news-table' in the Soup and load it into 'news_table' news_table = html.find(id='news-table') # Add the table to our dictionary news_tables[ticker] = news_table parsed_news = [] # Iterate through the news for file_name, news_table in news_tables.items(): # Iterate through all tr tags in 'news_table' for x in news_table.findAll('tr'): # read the text from each tr tag into text # get text from a only text = x.a.get_text() # splite text in the td tag into a list date_scrape = x.td.text.split() # if the length of 'date_scrape' is 1, load 'time' as the only element if len(date_scrape) == 1: time = date_scrape[0] # else load 'date' as the 1st element and 'time' as the second else: date = date_scrape[0] time = date_scrape[1] # Extract the ticker from the file name, get the string up to the 1st '_' ticker = file_name.split('_')[0] # Append ticker, date, time and headline as a list to the 'parsed_news' list parsed_news.append([ticker, date, time, text]) # Instantiate the sentiment intensity analyzer vader = SentimentIntensityAnalyzer() # Set column names columns = ['ticker', 'date', 'time', 'headline'] # Convert the parsed_news list into a DataFrame called 'parsed_and_scored_news' parsed_and_scored_news = pd.DataFrame(parsed_news, columns=columns) # Iterate through the headlines and get the polarity scores using vader scores = parsed_and_scored_news['headline'].apply( vader.polarity_scores).tolist() # Convert the 'scores' list of dicts into a DataFrame scores_df = pd.DataFrame(scores) # Join the DataFrames of the news and the list of dicts parsed_and_scored_news = parsed_and_scored_news.join( scores_df, rsuffix='_right') # Convert the date column from string to datetime parsed_and_scored_news['date'] = pd.to_datetime( parsed_and_scored_news.date).dt.date parsed_and_scored_news['Sentiment'] = np.where( parsed_and_scored_news['compound'] > 0, 'Positive', (np.where(parsed_and_scored_news['compound'] == 0, 'Neutral', 'Negative'))) return parsed_and_scored_news df = newsfromfizviz(ticker) df_pie = df[['Sentiment', 'headline']].groupby('Sentiment').count() fig = px.pie(df_pie, values=df_pie['headline'], names=df_pie.index, color=df_pie.index, color_discrete_map={ 'Positive': 'green', 'Neutral': 'darkblue', 'Negative': 'red' }) st.subheader('Dataframe with Latest News') st.dataframe(df) st.subheader('Latest News Sentiment Distribution using Pie Chart') st.plotly_chart(fig) plt.rcParams['figure.figsize'] = [11, 5] # Group by date and ticker columns from scored_news and calculate the mean mean_scores = df.groupby(['ticker', 'date']).mean() # Unstack the column ticker mean_scores = mean_scores.unstack() # Get the cross-section of compound in the 'columns' axis mean_scores = mean_scores.xs('compound', axis="columns").transpose() # Plot a bar chart with pandas mean_scores.plot(kind='bar') plt.grid() st.set_option('deprecation.showPyplotGlobalUse', False) st.subheader('Sentiments over Time') st.pyplot() elif page == "Company Basic Details": snp500 = pd.read_csv("./Datasets/SP500.csv") symbols = snp500['Symbol'].sort_values().tolist() ticker = st.sidebar.selectbox('Choose a S&P 500 Stock', symbols) stock = yf.Ticker(ticker) stock = yf.Ticker(ticker) info = stock.info st.title('Company Basic Details') st.subheader(info['longName']) st.markdown('** Sector **: ' + info['sector']) st.markdown('** Industry **: ' + info['industry']) st.markdown('** Phone **: ' + info['phone']) st.markdown('** Address **: ' + info['address1'] + ', ' + info['city'] + ', ' + info['zip'] + ', ' + info['country']) st.markdown('** Website **: ' + info['website']) st.markdown('** Business Summary **') st.info(info['longBusinessSummary']) fundInfo = { 'Enterprise Value (USD)': info['enterpriseValue'], 'Enterprise To Revenue Ratio': info['enterpriseToRevenue'], 'Enterprise To Ebitda Ratio': info['enterpriseToEbitda'], 'Net Income (USD)': info['netIncomeToCommon'], 'Profit Margin Ratio': info['profitMargins'], 'Forward PE Ratio': info['forwardPE'], 'PEG Ratio': info['pegRatio'], 'Price to Book Ratio': info['priceToBook'], 'Forward EPS (USD)': info['forwardEps'], 'Beta ': info['beta'], 'Book Value (USD)': info['bookValue'], 'Dividend Rate (%)': info['dividendRate'], 'Dividend Yield (%)': info['dividendYield'], 'Five year Avg Dividend Yield (%)': info['fiveYearAvgDividendYield'], 'Payout Ratio': info['payoutRatio'] } fundDF = pd.DataFrame.from_dict(fundInfo, orient='index') fundDF = fundDF.rename(columns={0: 'Value'}) st.subheader('Fundamental Info') st.table(fundDF) st.subheader('General Stock Info') st.markdown('** Market **: ' + info['market']) st.markdown('** Exchange **: ' + info['exchange']) st.markdown('** Quote Type **: ' + info['quoteType']) start = dt.datetime.today() - dt.timedelta(2 * 365) end = dt.datetime.today() df = yf.download(ticker, start, end) df = df.reset_index() fig = go.Figure(data=go.Scatter(x=df['Date'], y=df['Adj Close'])) fig.update_layout( title={ 'text': "Stock Prices Over Past Two Years", 'y': 0.9, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top' }) st.plotly_chart(fig, use_container_width=True) marketInfo = { "Volume": info['volume'], "Average Volume": info['averageVolume'], "Market Cap": info["marketCap"], "Float Shares": info['floatShares'], "Regular Market Price (USD)": info['regularMarketPrice'], 'Bid Size': info['bidSize'], 'Ask Size': info['askSize'], "Share Short": info['sharesShort'], 'Short Ratio': info['shortRatio'], 'Share Outstanding': info['sharesOutstanding'] } marketDF = pd.DataFrame(data=marketInfo, index=[0]) st.table(marketDF) else: verified = "False" result = "Please enter valid Username, Password and Acess Token!!" st.title(result)
y=bikerides['rain'], name='Rain', )) if temp_con == True: fig.add_trace( go.Scatter( x=bikerides['ds'], y=bikerides['temp'], name='Temp', )) st.write(fig) st.write(m.plot(forecast)) st.write(m.plot_components(forecast)) st.write(getPerformanceMetrics(m).mean()) st.header('Forecasting Rides') period = st.slider( 'Forecast Periods in days', 1, 365, ) st.write("Forecast Periods ", period, " days") # We must create a data frame holding dates for our forecast. The periods # parameter counts days as long as the frequency is 'D' for the day. Let's # do a 180 day forecast, approximately half a year. future = m.make_future_dataframe(periods=period, freq='D') future = future.merge(bikerides, on='ds', how='left')
def compare_models(data, variable, test_size): test_split = len(data) - test_size # simple model using mean electricity use by month, weekday and hour data_train = data[:test_split][[variable]] data_train_grouped = (data_train[[variable]].groupby([ data_train.index.month, data_train.index.weekday, data_train.index.hour ]).mean()) data_train_grouped.index.names = ["month", "weekday", "hour"] data_test = pd.DataFrame( data={ "month": data[test_split:].index.month, "weekday": data[test_split:].index.weekday, "hour": data[test_split:].index.hour, }, index=data[test_split:].index, ) mean_grouped_predictions = data_test.join(data_train_grouped, how="left", on=["month", "weekday", "hour"])[variable] # preparing data for prophet df = data[variable].reset_index(level=0) df.columns = ["ds", "y"] df_train = df[:test_split] df_test = df[test_split:] m_simple = Prophet() m_simple.fit(df_train) future_simple = m_simple.make_future_dataframe(periods=test_size, freq="H") forecast_simple = m_simple.predict(future_simple) # limiting low predictions to zero forecast_simple["yhat"] = np.where(forecast_simple["yhat"] < 0, 0, forecast_simple["yhat"]) forecast_simple["yhat_lower"] = np.where(forecast_simple["yhat_lower"] < 0, 0, forecast_simple["yhat_lower"]) forecast_simple["yhat_upper"] = np.where(forecast_simple["yhat_upper"] < 0, 0, forecast_simple["yhat_upper"]) global forecast_plot_simple global component_plot_simple forecast_plot_simple = m_simple.plot(forecast_simple) component_plot_simple = m_simple.plot_components(forecast_simple) # using inbuilt holidays because this automatically applies to predictions also m_holiday = Prophet() m_holiday.add_country_holidays(country_name="FRA") m_holiday.fit(df_train) future_holiday = m_holiday.make_future_dataframe(periods=test_size, freq="H") forecast_holiday = m_holiday.predict(future_holiday) # limiting low predictions to zero forecast_holiday["yhat"] = np.where(forecast_holiday["yhat"] < 0, 0, forecast_holiday["yhat"]) forecast_holiday["yhat_lower"] = np.where( forecast_holiday["yhat_lower"] < 0, 0, forecast_holiday["yhat_lower"]) forecast_holiday["yhat_upper"] = np.where( forecast_holiday["yhat_upper"] < 0, 0, forecast_holiday["yhat_upper"]) global forecast_plot_holiday global component_plot_holiday forecast_plot_holiday = m_holiday.plot(forecast_holiday) component_plot_holiday = m_holiday.plot_components(forecast_holiday) m_temp = Prophet() m_temp.add_regressor("temperature") m_temp.add_regressor("temperature2") m_temp.add_regressor("temperature_lag") m_temp.add_regressor("temperature2_lag") df_train["temperature"] = data["temperature"][:test_split].to_numpy() df_train["temperature2"] = df_train["temperature"]**2 df_train["temperature_lag"] = df_train["temperature"].shift( 1, fill_value=df_train["temperature"].mean()) df_train["temperature2_lag"] = df_train["temperature2"].shift( 1, fill_value=df_train["temperature"].mean()**2) m_temp.fit(df_train) future_temp = m_temp.make_future_dataframe(periods=test_size, freq="H") future_temp["temperature"] = data["temperature"][-len(future_temp ):].to_numpy() future_temp["temperature2"] = future_temp["temperature"]**2 future_temp["temperature_lag"] = future_temp["temperature"].shift( 1, fill_value=future_temp["temperature"].mean()) future_temp["temperature2_lag"] = future_temp["temperature2"].shift( 1, fill_value=future_temp["temperature"].mean()**2) forecast_temp = m_temp.predict(future_temp) # limiting low predictions to zero forecast_temp["yhat"] = np.where(forecast_temp["yhat"] < 0, 0, forecast_temp["yhat"]) forecast_temp["yhat_lower"] = np.where(forecast_temp["yhat_lower"] < 0, 0, forecast_temp["yhat_lower"]) forecast_temp["yhat_upper"] = np.where(forecast_temp["yhat_upper"] < 0, 0, forecast_temp["yhat_upper"]) global forecast_plot_temp global component_plot_temp forecast_plot_temp = m_temp.plot(forecast_temp) component_plot_temp = m_temp.plot_components(forecast_temp) # calculate rmse df_test.y.describe() print( "Mean RMSE: ", mean_squared_error(df_test.y, np.repeat(df_train.y.mean(), len(df_test))), ) print("Mean grouped RMSE: ", mean_squared_error(df_test.y, mean_grouped_predictions)) print( "Simple Prophet: ", mean_squared_error(df_test.y, forecast_simple.yhat[test_split:]), ) print( "Holiday Prophet: ", mean_squared_error(df_test.y, forecast_holiday.yhat[test_split:]), ) print( "Temperature Prophet: ", mean_squared_error(df_test.y, forecast_temp.yhat[test_split:]), )
organic_df plt.figure(figsize=(20,5)) plt.plot(organic_df['Date'], organic_df['AveragePrice']) plt.xticks([]) organic_df = organic_df.rename(columns={'Date':'ds', 'AveragePrice':'y'}) m = Prophet() m.fit(organic_df) future = m.make_future_dataframe(periods=365) forecast = m.predict(future) figure = m.plot(forecast, xlabel='Date', ylabel='Price') decomposed_figure = m.plot_components(forecast) """##Price Prediction with Conventional Type""" conventional_df = df[df['type']=='conventional'] conventional_df plt.figure(figsize=(20,5)) plt.plot(conventional_df['Date'], conventional_df['AveragePrice']) plt.xticks([]) conventional_df = conventional_df.rename(columns={'Date':'ds', 'AveragePrice':'y'}) m = Prophet() m.fit(conventional_df) future = m.make_future_dataframe(periods=365)
def main(): st.header("Online Stock Price Ticker") # symbol = st.text_input('심볼 입력 : ') symbol = 'AMZN' data = yf.Ticker(symbol) today = datetime.now().date().isoformat() #문자열로 print(today) #------------------------------------------------------ df = data.history(start='2010-06-01', end=today) st.dataframe(df) # --------------------Close차트 ----------------- st.subheader('종가') st.line_chart(df['Close']) #------------------------------------------------ st.subheader('거래량') st.line_chart(df['Volume']) # yfinace의 라이브러리만의 정보 # data.info # data.calendar # data.major_holders # data.institutional_holders # data.recommendations div_df = data.dividends #배당금정보 st.dataframe(div_df.resample('Y').sum()) new_df = div_df.reset_index() new_df['Year'] = new_df['Date'].dt.year st.dataframe(new_df) fig = plt.figure() plt.bar(new_df['Year'], new_df['Dividends']) st.pyplot(fig) # 여러주식 한번에 보여주기 favorites = ['msft', 'aapl', 'amzn', 'tsla', 'nvda'] f_df = pd.DataFrame() for stock in favorites: f_df[stock] = yf.Ticker(stock).history(start='2010-01-01', end=today)['Close'] st.dataframe(f_df) # 차트그리기 st.line_chart(f_df) #=============================stocktwits=================================== # 스탁 트윗 API 호출 res = requests.get( 'https://api.stocktwits.com/api/2/streams/symbol/{}.json'.format( symbol)) # json 형식이므로 .json()이용 res_data = res.json() # 파이썬의 딕셔너리와 리스트로 활용 # st.write(res_data) for massage in res_data['messages']: col1, col2 = st.beta_columns([1, 4]) #영역 잡기(비율) with col1: st.image(massage['user']['avatar_url']) #아바타 사진 with col2: st.write('유저이름 : ' + massage['user']['username']) st.write('트윗 내용 : ' + massage['body']) st.write('올린 시간 : ' + massage['created_at']) p_df = df.reset_index() p_df.rename(columns={'Date': 'ds', 'Close': 'y'}, inplace=True) st.dataframe(p_df) #예측 가능 m = Prophet() m.fit(p_df) future = m.make_future_dataframe(periods=365) forecast = m.predict(future) st.dataframe(forecast) fig1 = m.plot(forecast) st.pyplot(fig1) fig2 = m.plot_components(forecast) st.pyplot(fig2) pass
# In[77]: plt.figure(figsize=(10, 7)) plt.plot(forecast['Date'], forecast['furniture_trend'], 'b-') plt.plot(forecast['Date'], forecast['office_trend'], 'r-') plt.legend() plt.xlabel('Date') plt.ylabel('Sales') plt.title('Furniture vs. Office Supplies Sales Trend') # In[78]: plt.figure(figsize=(10, 7)) plt.plot(forecast['Date'], forecast['furniture_yhat'], 'b-') plt.plot(forecast['Date'], forecast['office_yhat'], 'r-') plt.legend() plt.xlabel('Date') plt.ylabel('Sales') plt.title('Furniture vs. Office Supplies Estimate') # # Trends and Patterns # Now, we can use the Prophet Models to inspect different trends of these two categories in the data. # In[79]: furniture_model.plot_components(furniture_forecast) # In[80]: office_model.plot_components(office_forecast)
import pandas as pd import numpy as np from fbprophet import Prophet # Prep the dataset data = pd.read_csv("/home/dusty/Econ8310/DataSets/chicagoBusRiders.csv") route3 = data[data.route=='3'][['date','rides']] route3.date = pd.to_datetime(route3.date, infer_datetime_format=True) route3.columns = [['ds', 'y']] # Initialize Prophet instance and fit to data m = Prophet() m.fit(route3) # Create timeline for 1 year in future, then generate predictions based on that timeline future = m.make_future_dataframe(periods=365) forecast = m.predict(future) # Create plots of forecast and truth, as well as component breakdowns of the trends plt = m.plot(forecast) plt.show() comp = m.plot_components(forecast) comp.show()
prophet_pred = prophet_pred.reset_index('ds') # In[79]: #plot prophet yhat, yhat_lower, and yhat_upper pro.plot(prophet_pred) plt.savefig(r'/home/songy4/Documents/prophet_log_range.png') # In[80]: pro.plot_components(prophet_pred) plt.savefig(r'/home/songy4/Documents/prophet_components.png') # In[81]: #calculate rmse, mse errors and mean value prophet_rmse_error = rmse(test_US['Total_case'], test_US['Prophet_yhat']) prophet_mse_error = prophet_rmse_error **2 mean_value = df_US['Total_case'].mean() print(f'RMSE Error: {prophet_rmse_error} \n MSE Error: {prophet_mse_error} \n Mean: {mean_value}')
pjme_test_fcst.tail() # In[15]: # Plot the forecast f, ax = plt.subplots(1) f.set_figheight(5) f.set_figwidth(15) fig = model.plot(pjme_test_fcst, ax=ax) plt.show() # In[16]: # Plot the components of the model fig = model.plot_components(pjme_test_fcst) # In[17]: # Plot the forecast with the actuals f, ax = plt.subplots(1) f.set_figheight(5) f.set_figwidth(15) ax.scatter(pjme_test.index, pjme_test['PJME_MW'], color='r') fig = model.plot(pjme_test_fcst, ax=ax) # In[40]: # Plot the forecast with the actuals f, ax = plt.subplots(1) f.set_figheight(5)
# モデル作成 m = Prophet(growth="linear", yearly_seasonality=10, weekly_seasonality=False, daily_seasonality=False, seasonality_mode="multiplicative").fit(train) # 予測期間 future = m.make_future_dataframe(periods=24, freq="MS") forecast = m.predict(df=future) fig = m.plot(forecast) fig.savefig("figure/02_prophet_forecast.png") fig.clf() # トレンドと年周期 fig = m.plot_components(forecast) fig.savefig("figure/03_prophet_components.png") fig.clf() # 実測値と予測値、信頼区間 plt.scatter(df.index, df["#Passengers"], color="black", s=10, label="#Passengers") plt.plot(forecast["ds"], forecast["yhat"], label="yhat") plt.fill_between(df.index, forecast["yhat_upper"], forecast["yhat_lower"], color="blue", alpha=.1,
m = Prophet(holidays=holidays, weekly_seasonality=True, daily_seasonality=False, yearly_seasonality=False, n_changepoints=20) m.add_country_holidays(country_name='Chile') m.fit(df) # Se indica cuáles serán los futures. future = m.make_future_dataframe(periods=7) future.tail() # Forecast forecast = m.predict(future) forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(14) # Se grafican los componentes del forecast (trend, weekly, yearly) fig2 = m.plot_components(forecast) plt.title('Componentes del forecast sin tuning') plt.show() # Se grafica cuándo se producen los mayores cambios en la tendencia. fig3 = m.plot(forecast) a = add_changepoints_to_plot(fig3.gca(), m, forecast) plt.title('Pronóstico con changepoints modelo sin tunear') plt.show() # Crossvalidation. df_cv = cross_validation(m, initial='30 days', horizon='7 days', parallel='processes', period='1 days') df_p = performance_metrics(df_cv, rolling_window=1)
forecastHorizon = 24 forecastModel = Prophet() forecastModel.fit(train_X) #forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']] #Trend lines and confident intervals can be found in forecast object's above attribues #Eliminate days that are weekend because stock market is closed. future = forecastModel.make_future_dataframe(periods=40) future['day'] = future['ds'].dt.weekday future = future[future['day']<=4] forecast = forecastModel.predict(future) #Plot stock trend forecasted for specified horizon above. forecastPlot = forecastModel.plot(forecast) forecastComponentPlot = forecastModel.plot_components(forecast) plotWithChangepoints = add_changepoints_to_plot(forecastPlot.gca(), forecastModel, forecast) #in Prophet model constructor, change_prior_scale argument control how much flexibility is allowed for changepoints. #Increasing that parameter means that model is that much more flexible, otherwise less flexible. #This parameter have an effect on avoiding overfitting data. Therefore , It should be considered carefully. #Prophet defaults that parameter to 0.05 change_point_scale = [0.00,0.25,0.50,0.75,1.00,1.25,1.50] performances = [] for changePoint in change_point_scale: forecastModelCP = Prophet(changepoint_prior_scale=changePoint) forecastModelCP.fit(train_X) forecastCV = cross_validation(forecastModelCP, horizon='40 days') forecastPM = performance_metrics(forecastCV) performances.append((forecastPM,forecastCV))
# Finally, we create the matplot figure. figure = model.plot(forecast, xlabel='Date', ylabel='Points') # Get the D3 HTML graph. # with open("test.html", "w") as file: # fig = pd.Series(figure).to_json() # html = mpld3.fig_to_html(fig) # file.write(html) # And then save it to an image file. matplotlib.pyplot.savefig("out.png") pp.pprint(figure) # Components figure. figure_components = model.plot_components(forecast) # And then save it to an image file. matplotlib.pyplot.savefig("out-components.png") pp.pprint(figure) with open("predictions.json", "w") as file: forecast_data_orig = forecast #forecast_data_orig['yhat'] = np.exp(forecast_data_orig['yhat']) forecast_data_orig['yhat_lower'] = np.exp(forecast_data_orig['yhat_lower']) forecast_data_orig['yhat_upper'] = np.exp(forecast_data_orig['yhat_upper']) # print(forecast_data_orig) d = forecast_data_orig['yhat'].to_dict() predictions = []
df_sample df_sample = df_sample.rename(columns={'Date': 'ds', 'AveragePrice': 'y'}) df_sample # # TASK 5: DEVELOP MODEL AND MAKE PREDICTIONS - PART A m = Prophet() m.fit(df_sample) # Forcasting into the future future = m.make_future_dataframe(periods=365) forecast = m.predict(future) forecast figure = m.plot(forecast, xlabel='Date', ylabel='Price') figure2 = m.plot_components(forecast) # # TASK 6: DEVELOP MODEL AND MAKE PREDICTIONS (REGION SPECIFIC) - PART B # Select specific region df_r1 = df[df['region'] == 'West'] df_r2 = df[df['region'] == 'Chicago'] df_r1 = df_r1.sort_values('Date') df_r2 = df_r2.sort_values('Date') plt.plot(df_r1['Date'], df_r1['AveragePrice']) plt.plot(df_r2['Date'], df_r2['AveragePrice']) df_r1 = df_r1.rename(columns={'Date': 'ds', 'AveragePrice': 'y'}) df_r2 = df_r2.rename(columns={'Date': 'ds', 'AveragePrice': 'y'})
#!/usr/bin/env python # -*- coding: utf-8 -*- """ @author = 'wyx' @time = 2017/6/22 10:04 @annotation = '' """ import numpy as np import pandas as pd from fbprophet import Prophet from matplotlib import pyplot as plot """ Prophet allows you to make forecasts using a logistic growth trend model, with a specified carrying capacity. """ df = pd.read_csv('example_wp_R.csv') df['y'] = np.log(df['y']) df['cap'] = 8.5 m = Prophet(growth='logistic') m.fit(df) future = m.make_future_dataframe(periods=1826) future['cap'] = 8.5 fcst = m.predict(future) m.plot_components(fcst) plot.show()
""" import pandas as pd from fbprophet import Prophet # instantiate the model and set parameters model = Prophet(changepoint_prior_scale=0.01, interval_width=0.95, growth='linear', daily_seasonality=True, weekly_seasonality=False, yearly_seasonality=True, seasonality_mode='additive') history_pd = pd.read_csv("load.csv") # fit the model to historical data model.fit(history_pd) # projects over 8760 hours - 1 year future_pd = model.make_future_dataframe(periods=8760, freq='H', include_history=True) # predict over the dataset forecast_pd = model.predict(future_pd) fig1 = model.plot(forecast_pd, xlabel='date', ylabel='load') fig2 = model.plot_components(forecast_pd)
#instantiate Prophet model = Prophet() model.fit(df) future_data = model.make_future_dataframe(periods=10, freq='D') #dropdown future_data.tail() forecast_data = model.predict(future_data) forecast_data[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(5) # make sure we save the original forecast data forecast_data_orig = forecast_data forecast_data_orig['yhat'] = np.exp(forecast_data_orig['yhat']) forecast_data_orig['yhat_lower'] = np.exp(forecast_data_orig['yhat_lower']) forecast_data_orig['yhat_upper'] = np.exp(forecast_data_orig['yhat_upper']) fig = model.plot(forecast_data_orig) fig2 = model.plot_components(forecast_data_orig) df['y_log'] = df['y'] df['y'] = df['y_orig'] # Python from fbprophet.plot import plot_plotly import plotly.offline as py py.init_notebook_mode() fig = plot_plotly(model, forecast_data_orig) # This returns a plotly Figure py.iplot(fig) final_df = pd.DataFrame(forecast_data_orig) actual_chart = go.Scatter(y=df["y_orig"], name='Actual') predict_chart = go.Scatter(y=final_df["yhat"], name='Predicted')
finish_date = datetime.strptime("2011-12-31 23:59", "%Y-%m-%d %H:%M") number_tower = "Data_1st" query = '''SELECT Date, %s FROM Data WHERE Date >= \'%s\' AND Date <= \'%s\';''' % ( str(number_tower), start_date, finish_date) conn = sqlite3.connect("mydatabase.db") cursor = conn.cursor() dataset = pd.read_sql_query(query, conn) dataset.Date = dataset["Date"].apply(pd.to_datetime) print(dataset) predictions = 180 # приводим dataframe к нужному формату df = dataset print(df.head()) df.columns = ['ds', 'y'] # отрезаем из обучающей выборки последние 30 точек, чтобы измерить на них качество train_df = df[:-predictions] m = Prophet() m.fit(train_df) future = m.make_future_dataframe(periods=predictions) forecast = m.predict(future) m.plot(forecast) m.plot_components(forecast) plt.show()
def sol(): data = pd.read_csv('./Datasets/Confirmed.csv', header=0, index_col=0) data.head() rf = pd.read_csv('./Datasets/Deceased.csv',header=0, index_col=0) rf.head() jf = pd.read_csv('./Datasets/Recovered.csv',header=0, index_col=0) jf.head() tc = data.mean() plt.ylabel("Mean of Daily cases in India") plt.savefig('./static/content/Images_India/India_mean_daily_Confirm_cases.png') va = data.var() plt.ylabel("Variance of Daily cases") plt.savefig('./static/content/Images_India/India_variance_daily_Confirm_cases.png') sc = data.std() plt.ylabel("Standard Deviation Of daily cases") plt.savefig('./static/content/Images_India/India_standard_daily_Confirm_cases.png') data = data.T data.tail(10) a = np.array(data['India']) b = np.array(data.index) from datetime import datetime l = [i for i in range(len(a))] s = [] for i in range(len(a)): x = datetime.strptime(b[i]+'20','%d-%b-%Y') s.append([x,float(str(a[i]).replace(',',''))]) p = pd.DataFrame(data=s,index=l,columns=['ds','y']) p.plot('ds','y') plt.title("Trend in Increase of Confirmed Cases for India") plt.ylabel('Date') plt.xlabel('Number of Confirmed cases') plt.savefig('./static/content/Images_India/India_confirmed_trend.png') from fbprophet import Prophet m = Prophet() m.fit(p) future = m.make_future_dataframe(periods=30) forecast = m.predict(future) india_plot = m.plot(forecast) plt.ylabel("No. of Cases for India") india_plot.savefig('./static/content/Images_India/india_confirm.png'); india_forecast_plot = m.plot_components(forecast) india_forecast_plot.savefig('./static/content/Images_India/India_confirm_components.png'); forecast.tail(55) for j in ['Maharashtra','Delhi', 'Gujrat', 'Madhya Pradesh', 'Tamilnadu', 'Rajasthan', 'Uttar Pradesh', 'Telengana', 'Andhra Pradesh', 'Kerala', 'Karnataka', 'Jammu And Kashmir', 'West Bengal', 'Harayana', 'Punjab', 'Bihar', 'Orissa', 'Uttarakhand', 'Himachal Pradesh', 'Chattisgarh', 'Assam', 'Jharkhand', 'Chandigarh', 'Ladakh', 'Andaman And Nicobar', 'Meghalaya', 'Goa', 'Puducherry', 'Manipur', 'Tripura', 'Arunachal Pradesh', 'Mizoram','Nagaland','Dadra And Nagar Haveli','Daman And Deu','Lakshadweep']: a = np.array(data[j]) b = np.array(data.index) from datetime import datetime l = [i for i in range(len(a))] s = [] for i in range(len(a)): x = datetime.strptime(b[i]+'20','%d-%b-%Y') s.append([x,float(str(a[i]).replace(',',''))]) p = pd.DataFrame(data=s,index=l,columns=['ds','y']) from fbprophet import Prophet m = Prophet() m.fit(p) future = m.make_future_dataframe(periods=30) forecast = m.predict(future) x = './static/content/Images_Confirmed/' state_plot = m.plot(forecast) plt.ylabel("No. of Confirmed Cases "+j) state_plot.savefig(x+j+'_seasonality.png'); state_forecast_plot = m.plot_components(forecast) state_forecast_plot.savefig(x+j+'_components.png'); for j in ['Maharashtra','Delhi', 'Gujrat', 'Madhya Pradesh', 'Tamilnadu', 'Rajasthan', 'Uttar Pradesh', 'Telengana', 'Andhra Pradesh', 'Kerala', 'Karnataka', 'Jammu And Kashmir', 'West Bengal', 'Harayana', 'Punjab', 'Bihar', 'Orissa', 'Uttarakhand', 'Himachal Pradesh', 'Chattisgarh', 'Assam', 'Jharkhand', 'Chandigarh', 'Ladakh', 'Andaman And Nicobar', 'Meghalaya', 'Goa', 'Puducherry', 'Manipur', 'Tripura', 'Arunachal Pradesh', 'Mizoram','Nagaland','Dadra And Nagar Haveli','Daman And Deu','Lakshadweep']: a = np.array(data[j]) b = np.array(data.index) from datetime import datetime l = [i for i in range(len(a))] s = [] for i in range(len(a)): x = datetime.strptime(b[i]+'20','%d-%b-%Y') s.append([x,float(str(a[i]).replace(',',''))]) p = pd.DataFrame(data=s,index=l,columns=['ds','y']) pd.plotting.register_matplotlib_converters() x = p.plot('ds','y') plt.title("Cases for "+j) plt.savefig('./static/content/Images_statewise_confirmed_plot/'+j+'_mean.png') rf.head() rf = rf.T rf.tail() a = np.array(rf['INDIA']) b = np.array(rf.index) from datetime import datetime l = [i for i in range(len(a))] s = [] for i in range(len(a)): x = datetime.strptime(b[i]+'20','%d-%b-%Y') s.append([x,float(str(a[i]).replace(',',''))]) p = pd.DataFrame(data=s,index=l,columns=['ds','y']) pd.plotting.register_matplotlib_converters() p.plot('ds','y') plt.title("Trend in Increase of Deceased Cases for India") plt.ylabel('Date') plt.xlabel('Number of Deceased cases') plt.savefig('./static/content/Images_India/India_deaths_trend.png') from fbprophet import Prophet m = Prophet() m.fit(p) future = m.make_future_dataframe(periods=30) forecast = m.predict(future) india_plot = m.plot(forecast) plt.ylabel("No. of Cases for India") india_plot.savefig('./static/content/Images_India/india_deceased_seasonalities.png'); india_forecast_plot = m.plot_components(forecast) india_forecast_plot.savefig('./static/content/Images_India/India_deceased_components.png'); for j in ['MAHARASHTRA', 'DELHI', 'GUJRAT', 'MADHYA PRADESH', 'TAMILNADU', 'RAJASTHAN', 'UTTAR PRADESH', 'TELENGANA', 'ANDHRA PRADESH', 'KERALA', 'KARNATAKA', 'JAMMU AND KASHMIR', 'WEST BENGAL', 'HARAYANA', 'PUNJAB', 'BIHAR', 'ORISSA', 'UTTARAKHAND', 'HIMACHAL PRADESH', 'CHATTISGARH', 'ASSAM', 'JHARKHAND', 'CHANDIGARH', 'LADAKH', 'ANDAMAN AND NICOBAR', 'MEGHALAYA', 'GOA', 'PUDUCHERRY', 'MANIPUR', 'TRIPURA', 'ARUNACHAL PRADESH', 'MIZORAM','NAGALAND','DADRA AND NAGAR HAVELI','DAMAN AND DEU','LAKSHADWEEP']: a = np.array(rf[j]) b = np.array(rf.index) from datetime import datetime l = [i for i in range(len(a))] s = [] for i in range(len(a)): x = datetime.strptime(b[i]+'20','%d-%b-%Y') s.append([x,float(str(a[i]).replace(',',''))]) p = pd.DataFrame(data=s,index=l,columns=['ds','y']) pd.plotting.register_matplotlib_converters() x = p.plot('ds','y') plt.title("Deaths for "+j) plt.savefig('./static/content/Images_statewise_deaths_plot/'+j+'_deathtrend.png') for j in ['MAHARASHTRA', 'DELHI', 'GUJRAT', 'MADHYA PRADESH', 'TAMILNADU', 'RAJASTHAN', 'UTTAR PRADESH', 'TELENGANA', 'ANDHRA PRADESH', 'KERALA', 'KARNATAKA', 'JAMMU AND KASHMIR', 'WEST BENGAL', 'HARAYANA', 'PUNJAB', 'BIHAR', 'ORISSA', 'UTTARAKHAND', 'HIMACHAL PRADESH', 'CHATTISGARH', 'ASSAM', 'JHARKHAND', 'CHANDIGARH', 'LADAKH', 'ANDAMAN AND NICOBAR', 'MEGHALAYA', 'GOA', 'PUDUCHERRY', 'MANIPUR', 'TRIPURA', 'ARUNACHAL PRADESH', 'MIZORAM','NAGALAND','DADRA AND NAGAR HAVELI','DAMAN AND DEU','LAKSHADWEEP']: a = np.array(rf[j]) b = np.array(rf.index) from datetime import datetime l = [i for i in range(len(a))] s = [] for i in range(len(a)): x = datetime.strptime(b[i]+'20','%d-%b-%Y') s.append([x,float(str(a[i]).replace(',',''))]) p = pd.DataFrame(data=s,index=l,columns=['ds','y']) from fbprophet import Prophet m = Prophet(interval_width=0.95) m.fit(p) future = m.make_future_dataframe(periods=30) forecast = m.predict(future) x = './static/content/Images_Death/' state_plot = m.plot(forecast) plt.ylabel("No. of deaths for "+j) state_plot.savefig(x+j+'_seasonalities.png') state_forecast_plot = m.plot_components(forecast) state_forecast_plot.savefig(x+j+'_components.png') jf.head() jf = jf.T jf.tail(10) a = np.array(jf['INDIA']) b = np.array(jf.index) from datetime import datetime l = [i for i in range(len(a))] s = [] for i in range(len(a)): x = datetime.strptime(b[i]+'20','%d-%b-%Y') s.append([x,float(str(a[i]).replace(',',''))]) p = pd.DataFrame(data=s,index=l,columns=['ds','y']) pd.plotting.register_matplotlib_converters() p.plot('ds','y') plt.title("Trend in Increase of Recovered Cases for India") plt.ylabel('Date') plt.xlabel('Number of Recovered cases') plt.savefig('./static/content/Images_India/India_recovered_trend.png') from fbprophet import Prophet m = Prophet() m.fit(p) future = m.make_future_dataframe(periods=30) forecast = m.predict(future) india_plot = m.plot(forecast) plt.ylabel("No. of Cases for India") india_plot.savefig('./static/content/Images_India/india_recovered_seasonalities.png'); india_forecast_plot = m.plot_components(forecast) india_forecast_plot.savefig('./static/content/Images_India/India_recovered_components.png'); for j in ['MAHARASHTRA', 'DELHI', 'GUJRAT', 'MADHYA PRADESH', 'TAMILNADU', 'RAJASTHAN', 'UTTAR PRADESH', 'TELENGANA', 'ANDHRA PRADESH', 'KERALA', 'KARNATAKA', 'JAMMU AND KASHMIR', 'WEST BENGAL', 'HARAYANA', 'PUNJAB', 'BIHAR', 'ORISSA', 'UTTARAKHAND', 'HIMACHAL PRADESH', 'CHATTISGARH', 'ASSAM', 'JHARKHAND', 'CHANDIGARH', 'LADAKH', 'ANDAMAN AND NICOBAR', 'MEGHALAYA', 'GOA', 'PUDUCHERRY', 'MANIPUR', 'TRIPURA', 'ARUNACHAL PRADESH', 'MIZORAM','NAGALAND','DADRA AND NAGAR HAVELI','DAMAN AND DEU','LAKSHADWEEP']: a = np.array(jf[j]) b = np.array(jf.index) from datetime import datetime l = [i for i in range(len(a))] s = [] for i in range(len(a)): x = datetime.strptime(b[i]+'20','%d-%b-%Y') s.append([x,float(str(a[i]).replace(',',''))]) p = pd.DataFrame(data=s,index=l,columns=['ds','y']) x = p.plot('ds','y') plt.title("Recovery Cases for "+j) plt.savefig('./static/content/Images_statewise_recovered_plot/'+j+'_normal.png') for j in ['MAHARASHTRA', 'DELHI', 'GUJRAT', 'MADHYA PRADESH', 'TAMILNADU', 'RAJASTHAN', 'UTTAR PRADESH', 'TELENGANA', 'ANDHRA PRADESH', 'KERALA', 'KARNATAKA', 'JAMMU AND KASHMIR', 'WEST BENGAL', 'HARAYANA', 'PUNJAB', 'BIHAR', 'ORISSA', 'UTTARAKHAND', 'HIMACHAL PRADESH', 'CHATTISGARH', 'ASSAM', 'JHARKHAND', 'CHANDIGARH', 'LADAKH', 'ANDAMAN AND NICOBAR', 'MEGHALAYA', 'GOA', 'PUDUCHERRY', 'MANIPUR', 'TRIPURA', 'ARUNACHAL PRADESH', 'MIZORAM','NAGALAND','DADRA AND NAGAR HAVELI','DAMAN AND DEU','LAKSHADWEEP']: a = np.array(jf[j]) b = np.array(jf.index) from datetime import datetime l = [i for i in range(len(a))] s = [] for i in range(len(a)): x = datetime.strptime(b[i]+'20','%d-%b-%Y') s.append([x,float(str(a[i]).replace(',',''))]) p = pd.DataFrame(data=s,index=l,columns=['ds','y']) from fbprophet import Prophet m = Prophet(interval_width=0.95) m.fit(p) future = m.make_future_dataframe(periods=30) forecast = m.predict(future) x = './static/content/Images_Recovered/' state_plot = m.plot(forecast) plt.ylabel("No. of Recovered cases for "+j) state_plot.savefig(x+j+'.png') state_forecast_plot = m.plot_components(forecast) state_forecast_plot.savefig(x+j+'_components.png')
import plotly.io as pio from fbprophet import Prophet from fbprophet.plot import plot_plotly pio.renderers.default = "png" df = pd.read_csv("example_wp_log_peyton_manning.csv") m = Prophet() m.fit(df) future = m.make_future_dataframe(periods=365) forecast = m.predict(future) fig1 = m.plot(forecast) fig2 = m.plot_components(forecast) #py.init_notebook_mode() fig = plot_plotly(m, forecast) # This returns a plotly Figure py.iplot(fig) fig.show() """ import matplotlib.pyplot as plt x = 2 plt.plot([4, 7, 9, 15]) plt.ylabel('some numbers') plt.savefig("scatch.png")
df_deaths = df_country.loc[:,['ds','death_day']] df_deaths.rename(columns={'death_day':'y'}, inplace =True) # fiting the model and making prediction m_cases = Prophet(yearly_seasonality=False, daily_seasonality=False, interval_width=0.95, growth='linear') m_cases.fit(df_cases) m_deaths = Prophet(yearly_seasonality=False, daily_seasonality=False, interval_width=0.95, growth='linear') m_deaths.fit(df_deaths) future_cases = m_cases.make_future_dataframe(periods=daysToPredict, freq='D', include_history=False) future_deaths = m_deaths.make_future_dataframe(periods=daysToPredict, freq='D', include_history=False) forecast_cases = m_cases.predict(future_cases) forecast_deaths = m_deaths.predict(future_deaths) if country == 'Brazil': fig = m_cases.plot_components(forecast_cases) fig.savefig('../predictions/brazil_prophet_cases.png') fig = m_deaths.plot_components(forecast_deaths) fig.savefig('../predictions/brazil_prophet_deaths.png') p = forecast_cases.loc[:,['ds','yhat']] p.rename(columns={'yhat': 'y'}, inplace= True) t = df_cases.append(p[['ds','y']], ignore_index=True) p = forecast_deaths.loc[:,['ds','yhat']] p.rename(columns={'yhat': 'y'}, inplace= True) s = df_deaths.append(p[['ds','y']], ignore_index=True) t['ds'] = t['ds'].astype('datetime64[ns]') t.rename(columns={'y': 'case_day'}, inplace= True) t['case_day'] = t['case_day'].astype('int32')
class ProphetForecaster(UVariateTimeSeriesClass): """Univariate time series child class using Prophet for forecasting,ref. to https://facebook.github.io/prophet Attributes ---------- _prophet_interval_width: float The width of the uncertainty intervals (by default 80%), also ref. to https://facebook.github.io/prophet/docs/uncertainty_intervals.html _yearly_seasonality: bool Consider yearly seasonality yes/no _monthly_seasonality: bool Consider monthly seasonality yes/no _quarterly_seasonality: bool Consider quarterly seasonality yes/no _weekly_seasonality: Consider weekly seasonality yes/no _daily_seasonality: bool Consider daily seasonality yes/no _weekend_seasonality: bool# Consider week-end seasonality yes/no. ref. to https://facebook.github.io/prophet/docs/seasonality,_holiday_effects,_and_regressors.html#modeling-holidays-and-special-events _changepoint_prior_scale: float If the trend changes are being overfit (too much flexibility) or underfit (not enough flexibility), you can adjust the strength of the sparse prior using this argument. By default, this parameter is set to 0.05. Increasing it will make the trend more flexible. Decreasing it will make the trend less flexible. ref. to https://facebook.github.io/prophet/docs/trend_changepoints.html#automatic-changepoint-detection-in-prophet _changepoint_range: float By default changepoints are only inferred for the first 80% of the time series in order to have plenty of runway for projecting the trend forward and to avoid overfitting fluctuations at the end of the time series. This default works in many situations but not all, and can be changed using the changepoint_range argument. For example, m = Prophet(changepoint_range=0.9) will place potential changepoints in the first 90% of the time series. ref. to https://facebook.github.io/prophet/docs/trend_changepoints.html#automatic-changepoint-detection-in-prophet _add_change_points: bool Whether to add change points to the plots ref. to https://facebook.github.io/prophet/docs/trend_changepoints.html#automatic-changepoint-detection-in-prophet _diagnose: bool Whether to run cross validation yes/no _history: str Amount of historic data in days for cross validation, Corresponds to initial in https://facebook.github.io/prophet/docs/diagnostics.html _step: str Correspons to period in the linke above. Defines step in days to shift the historic data _horizon: str Forecasting horizon in days for each cross validation run _consider_holidays: bool Whether to consider holiodays yes/no ref. to https://facebook.github.io/prophet/docs/seasonality,_holiday_effects,_and_regressors.html#modeling-holidays-and-special-events _country: str The country for which holidays are to be considered _prophet_logger: Logger The logger for logging Methods ---------- assertions() Assertion tests, must be overrided set_params() Sets new parameter values get_params_dict() Gets parameter values as a dictionary ts_fit() Fits the auto_arima model to time series ts_diagnose() Diagnoses the fitted model. Cross validation is started plot_residuals() Generates residual plots ts_test() Evaluates fitted model on the test data, if this one has been generated ts_forecast() Forecasts time series and plots the results plot_forecasts() Plots forecasted time-series """ def __init__(self, prophet_interval_width=0.95, yearly_seasonality=False, monthly_seasonality=False, quarterly_seasonality=False, weekly_seasonality=False, daily_seasonality=False, weekend_seasonality=False, changepoint_prior_scale=0.001, changepoint_range=0.9, add_change_points=True, diagnose=False, history=None, step=None, horizon=None, consider_holidays=True, country='DE', **kwds): """Initializes the object ProphetForecaster""" self._prophet_logger = Logger('prophet') try: super(ProphetForecaster, self).__init__(**kwds) except TypeError: self._prophet_logger.exception( "TypeError occurred, Arguments missing") self._model = None self._prophet_interval_width = prophet_interval_width self._yearly_seasonality = yearly_seasonality self._monthly_seasonality = monthly_seasonality self._quarterly_seasonality = quarterly_seasonality self._weekly_seasonality = weekly_seasonality self._daily_seasonality = daily_seasonality self._weekend_seasonality = weekend_seasonality self._changepoint_prior_scale = changepoint_prior_scale self._changepoint_range = changepoint_range self._add_change_points = add_change_points self._diagnose = diagnose self._history = history self._step = step self._horizon = horizon self._prophet_cv = None self._prophet_p = None self._consider_holidays = consider_holidays self._country = country self._id = 'Prophet' def __copy__(self): """Copies the object""" result = super(ProphetForecaster, self).__copy__() # result._model = self._model result._prophet_interval_width = self._prophet_interval_width result._yearly_seasonality = self._yearly_seasonality result._monthly_seasonality = self._monthly_seasonality result._quarterly_seasonality = self._quarterly_seasonality result._weekly_seasonality = self._weekly_seasonality result._daily_seasonality = self._daily_seasonality result._weekend_seasonality = self._weekend_seasonality result._changepoint_prior_scale = self._changepoint_prior_scale result._changepoint_range = self._changepoint_range result._add_change_points = self._add_change_points result._diagnose = self._diagnose result._history = self._history result._step = self._step result._horizon = self._horizon result._prophet_cv = self._prophet_cv result._prophet_p = self._prophet_p result._consider_holidays = self._consider_holidays result._country = self._country result._prophet_logger = self._prophet_logger return result def set_params(self, p_dict=None, **kwargs): """Sets new parameters""" params_dict = kwargs if p_dict is not None: params_dict = p_dict # for k, v in params_dict.items(): if k == 'ts_df': self.ts_df = v elif k == 'freq': self.freq = v elif k == 'n_test': self.n_test = v elif k == 'n_val': self.n_val = v elif k == 'timeformat': self.time_format = v elif k == "prophet_interval_width": self._prophet_interval_width = v elif k == "yearly_seasonality": self._yearly_seasonality = v elif k == "monthly_seasonality": self._monthly_seasonality = v elif k == "quarterly_seasonality": self._quarterly_seasonality = v elif k == "weekly_seasonality": self._weekly_seasonality = v elif k == "daily_seasonality": self._daily_seasonality = v elif k == "weekend_seasonality": self._weekend_seasonality = v elif k == "changepoint_prior_scale": self._changepoint_prior_scale = v elif k == "changepoint_range": self._changepoint_range = v elif k == "add_change_points": self._add_change_points = v elif k == "diagnose": self._diagnose = v elif k == "history": self._history = v elif k == "step": self._step = v elif k == "horizon": self._horizon = v elif k == "consider_holidays": self._consider_holidays = v elif k == "country": self._country = v return self def get_params_dict(self): """Gets parameters as a dictionary""" return { 'prophet_interval_width': self._prophet_interval_width, 'yearly_seasonality': self._yearly_seasonality, 'monthly_seasonality': self._monthly_seasonality, 'quarterly_seasonality': self._quarterly_seasonality, 'weekly_seasonality': self._weekly_seasonality, 'daily_seasonality': self._daily_seasonality, 'weekend_seasonality': self._weekend_seasonality, 'changepoint_prior_scale': self._changepoint_prior_scale, 'changepoint_range': self._changepoint_range, 'add_change_points': self._add_change_points, 'diagnose': self._diagnose, 'history': self._history, 'step': self._step, 'horizon': self._horizon, 'consider_holidays': self._consider_holidays, 'country': self._country } @staticmethod def we_season(ds): """Lambda function to prepare weekend_seasonality for Prophet""" date = pd.to_datetime(ds) return date.weekday() >= 5 def ts_fit(self, suppress=False): """Fit Prophet to the time series data. Parameters: ---------- suppress: bool Suppress or not some of the output messages """ if self.hyper_params is not None: self._gs.set_forecaster(self) self._gs.set_hyper_params(self.hyper_params) # a very important command here to avoid endless loop self.hyper_params = None self._prophet_logger.info("***** Starting grid search *****") self._gs = self._gs.grid_search(suppress=suppress, show_plot=False) # self.best_model = self._gs.best_model self.__dict__.update(self.best_model['forecaster'].__dict__) self._prophet_logger.info("***** Finished grid search *****") else: self._prepare_fit() self._model = None self.ts_split() ts_df = self._train_dt.copy() ts_test_df = self._test_dt # sanity check if 'on_weekend' in ts_df.columns: ts_df.drop(['on_weekend', 'off_weekend'], inplace=True, axis=1) # ts_test_df.drop(['on_weekend', 'off_weekend'], inplace=True, axis=1) # Fit self._prophet_logger.info("Trying to fit the Prophet model....") try: if not suppress: self._prophet_logger.info("...via using parameters\n") print_attributes(self) # diagnose on? if self._diagnose: try: assert self._step is not None and self._horizon is not None except (KeyError, AssertionError): self._prophet_logger.warning( "You want to diagnose the Prophet model. Please provide parameters " "'step' and 'horizon' within object initialization!" ) sys.exit("STOP") ts_df = ts_df.reset_index() ts_df.columns = self._ts_df_cols if ts_test_df is not None and not ts_test_df.empty: ts_test_df = ts_test_df.reset_index() ts_test_df.columns = self._ts_df_cols # weekly_s = self._weekly_seasonality if self._weekend_seasonality: # force to False weekly_s = False # if not self._consider_holidays: self._model = Prophet( interval_width=self._prophet_interval_width, yearly_seasonality=self._yearly_seasonality, weekly_seasonality=weekly_s, daily_seasonality=self._daily_seasonality, changepoint_range=self._changepoint_range, changepoint_prior_scale=self._changepoint_prior_scale) else: try: assert self._country in ['AT', 'DE', 'US'] except AssertionError: self._prophet_logger.exception( "Assrtion exception occurred. Right now, Austria (AT), " "Germany(DE) and USA (US) supported.") sys.exit("STOP") else: holi = None if self._country == 'AT': holi = holidays.AT( state=None, years=list( np.unique(np.asarray( self.ts_df.index.year)))) elif self._country == 'DE': holi = holidays.DE( state=None, years=list( np.unique(np.asarray( self.ts_df.index.year)))) elif self._country == 'US': holi = holidays.US( state=None, years=list( np.unique(np.asarray( self.ts_df.index.year)))) # holi_dict = dict() for date, name in sorted(holi.items()): holi_dict[date] = name df_holi = pd.DataFrame.from_dict( data=holi_dict, orient='index').reset_index() df_holi.columns = ['ds', 'holiday'] df_holi['lower_window'] = 0 df_holi['upper_window'] = 0 self._model = Prophet( interval_width=self._prophet_interval_width, yearly_seasonality=self._yearly_seasonality, weekly_seasonality=weekly_s, daily_seasonality=self._daily_seasonality, changepoint_range=self._changepoint_range, changepoint_prior_scale=self. _changepoint_prior_scale, holidays=df_holi) if self._monthly_seasonality: self._model.add_seasonality(name='monthly', period=30.5, fourier_order=20) if not suppress: self._prophet_logger.info("Added monthly seasonality.") if self._quarterly_seasonality: self._model.add_seasonality(name='quarterly', period=91.5, fourier_order=20) if not suppress: self._prophet_logger.info( "Added quarterly seasonality.") if self._weekend_seasonality: ts_df['on_weekend'] = ts_df['ds'].apply(self.we_season) ts_df['off_weekend'] = ~ts_df['ds'].apply(self.we_season) self._train_dt = ts_df.copy() self._train_dt.set_index('ds', inplace=True) # if ts_test_df is not None and not ts_test_df.empty: ts_test_df['on_weekend'] = ts_test_df['ds'].apply( self.we_season) ts_test_df['off_weekend'] = ~ts_test_df['ds'].apply( self.we_season) self._test_dt = ts_test_df.copy() self._test_dt.set_index('ds', inplace=True) # and add self._model.add_seasonality(name='weekend_on_season', period=7, fourier_order=5, condition_name='on_weekend') self._model.add_seasonality(name='weekend_off_season', period=7, fourier_order=5, condition_name='off_weekend') if not suppress: self._prophet_logger.info( "Added week-end seasonality.") # tic start = time() self.model_fit = self._model.fit(ts_df) # toc if not suppress: self._prophet_logger.info( "Time elapsed: {} sec.".format(time() - start)) except (Exception, ValueError): self._prophet_logger.exception("Prophet error...") return -1 else: self._prophet_logger.info( "Model successfully fitted to the data!") # Fitted values self._prophet_logger.info( "Computing fitted values and residuals...") # in-sample predict try: self.fittedvalues = self._model.predict( ts_df.drop('y', axis=1)) except (Exception, ValueError): self._prophet_logger.exception("Prophet predict error...") # Residuals try: # use fittedvalues to fill in the model dictionary self.residuals = pd.Series( np.asarray(ts_df.y) - np.asarray(self.fittedvalues['yhat']), index=self._train_dt.index) except (KeyError, AttributeError): self._prophet_logger.exception( "Model was not fitted or ts has other structure...") # self.lower_conf_int = pd.Series(np.asarray( self.fittedvalues['yhat_lower']), index=self._train_dt.index) self.upper_conf_int = pd.Series(np.asarray( self.fittedvalues['yhat_upper']), index=self._train_dt.index) self._prophet_logger.info("Done.") return self def ts_diagnose(self): """Diagnoses the fitted model""" try: assert self.model_fit is not None except AssertionError: self._prophet_logger.exception( "Model has to be fitted first! Please call ts_fit(...)") sys.exit("STOP") self.plot_residuals() if self._diagnose: if input( "Run cross validation y/n? Note, depending on parameters provided " "this can take some time...").strip().lower() == 'y': start = time() self._prophet_logger.info( "Running cross validation using parameters provided....") if self._history is not None: try: self._prophet_cv = cross_validation( self.model_fit, initial=self._history, period=self._step, horizon=self._horizon) except Exception: self._prophet_logger.exception( "Prophet cross validation error: check your " "parameters 'history', 'horizon', 'step'!") else: try: self._prophet_cv = cross_validation( self.model_fit, period=self._step, horizon=self._horizon) except Exception: self._prophet_logger.exception( "Prophet cross validation error: " "check your parameters 'horizon', 'step'!") self._prophet_logger.info("Time elapsed: {}".format(time() - start)) simu_intervals = self._prophet_cv.groupby('cutoff')['ds'].agg([ ('forecast_start', 'min'), ('forecast_till', 'max') ]) self._prophet_logger.info( "Following time windows and cutoffs have been set-up:\n") print(simu_intervals) # plot_cross_validation_metric(self._prophet_cv, metric='mape') # self._prophet_logger.info("Running performance metrics...") self._prophet_p = performance_metrics(self._prophet_cv) else: self._prophet_logger.info("OK") return def plot_residuals(self): """Plot the residuals""" fig, axes = super(ProphetForecaster, self)._plot_residuals( y=np.asarray(self._train_dt['y']), yhat=np.asarray(self.fittedvalues['yhat']), _id="Prophet") plt.gcf().autofmt_xdate() plt.grid(True) plt.show() def ts_test(self, show_plot=True): """Test the fitted model if test data available""" if super(ProphetForecaster, self)._check_ts_test() < 0: return self._prophet_logger.info( "Evaluating the fitted Prophet model on the test data...") self.forecast = self._model.predict( self._test_dt.copy().reset_index().drop('y', axis=1)) # confidence intervals self.lower_conf_int = pd.concat([ self.lower_conf_int, pd.Series(np.asarray(self.forecast['yhat_lower']), index=self._test_dt.index) ], axis=0) self.upper_conf_int = pd.concat([ self.upper_conf_int, pd.Series(np.asarray(self.forecast['yhat_upper']), index=self._test_dt.index) ], axis=0) self.residuals_forecast = pd.Series(np.asarray(self._test_dt['y']) - np.asarray(self.forecast['yhat']), index=self._test_dt.index) self.measure_rmse() self._prophet_logger.info("RMSE on test data: {}".format(self.rmse)) # plot if show_plot: self.plot_forecast() def ts_forecast(self, n_forecast, suppress): """Forecast time series over time frame in the future specified via n_forecast""" # n_forecast = super(ProphetForecaster, self)._check_ts_forecast(n_forecast) # self._prophet_logger.info("Fitting using all data....") self._mode = 'forecast' self.ts_fit(suppress=suppress) self._prophet_logger.info("Forecasting next " + str(n_forecast) + str(self.ts_df.index.freq)) # future = self._model.make_future_dataframe(periods=n_forecast, freq=self.freq) if self._weekend_seasonality: future['on_weekend'] = future['ds'].apply(self.we_season) future['off_weekend'] = ~future['ds'].apply(self.we_season) self.forecast = self._model.predict(future) # confidence intervals self.lower_conf_int = pd.concat([ self.lower_conf_int, pd.Series(np.asarray(self.forecast['yhat_lower']), index=future.ds) ], axis=0) self.upper_conf_int = pd.concat([ self.upper_conf_int, pd.Series(np.asarray(self.forecast['yhat_upper']), index=future.ds) ], axis=0) self.residuals_forecast = None self.plot_forecast() def plot_forecast(self): """Plot forecasted values""" if self.residuals_forecast is not None: fig, axes = super(ProphetForecaster, self)._plot_forecast( y=np.asarray(self._train_dt['y']), yhat=np.asarray(self.fittedvalues['yhat']), forecast=pd.Series(np.asarray(self.forecast['yhat']), index=self.forecast['ds']), _id='Prophet') else: fig_forecast = self._model.plot(self.forecast) fig_components = self._model.plot_components(self.forecast) if self._add_change_points: a = add_changepoints_to_plot(fig_forecast.gca(), self._model, self.forecast) plt.gcf().autofmt_xdate() plt.grid(True) plt.show()
# In[11]: k=Prophet(yearly_seasonality=13).fit(a) j=plot_yearly(k) # In[16]: m2= Prophet() forecast4 = m2.fit(a).predict(future) fig = m2.plot_components(forecast4) # In[33]: a.loc[a.loc[:]['y'] > 280]=None # In[35]: g=Prophet() model=g.fit(a) fig=model.plot(model.predict(future))
## Now merge to bring the ds back into the df ## Without the "on" keyword the join key is implicitly the index which is what we're doing here forecast2 = forecast2.join(forecast1['ds'], how='inner') #%% ## This works ## This will create a plot that includes Forecasted, C.I.'s, and Actual values m.plot(forecast1) #%% ## I think it is unecessary to review exponentiated components ## Plus the complexity of joining forecast2 with forecast1 m.plot_components(forecast1); #%% ## It was necessary, in the fill_between, to use a datetime index associated with ## the first parameter of the function. ## This necessitated converting the existing ds datetime element to an index pplt.subplots(figsize=(30,10)) forecast2.set_index('ds',inplace=True) ## If using the view_hour data it will be REQUIRED to exponentiate the forecasts (i.e., forecast2) pplt.plot(view_hour['distinct_freq_sum'], label='Original', color='black'); pplt.plot(forecast2.yhat, color='red', label='Forecast'); pplt.fill_between(forecast2.index, forecast2['yhat_upper'], forecast2['yhat_lower'], color='gray', alpha=0.25) pplt.ylabel('Distinct Freq Sums'); pplt.xlabel('Hours');
def main(): """Customer Lifetime Value & Sales Revenue Forecasting""" st.title("Customer Lifetime Value & Sales Revenue Forecasting") st.subheader( "Built with Streamlit,Lifetimes, fbProphet and Plotly library") # Menu menu = [ 'Exploratory Data Analysis', 'Customer Lifetime Value', 'Sales Revenue Forecasting', 'About' ] choices = st.sidebar.selectbox('Select Menu', menu) if choices == 'Exploratory Data Analysis': st.subheader('Exploratory Data Analysis') clean = pd.read_csv('data/clean_df.csv') clean = clean.drop('Unnamed: 0', axis=1) clean = clean.rename(columns={"Price": "Revenue"}) clean["Date"] = pd.to_datetime(clean["Date"]) clean["Month"] = clean["Date"].dt.strftime("%B") if st.checkbox('View Data'): st.dataframe(clean) st.subheader("Annual Aggregation") if st.checkbox('View Top 10 Items By Revenue'): revenue = clean.groupby( "Description")["Revenue"].sum().reset_index().sort_values( by="Revenue", ascending=False) revenue_head = revenue.head(10).sort_values(by="Revenue") fig1 = px.bar(revenue_head, x="Revenue", y="Description", orientation="h") st.plotly_chart(fig1) if st.checkbox('View Bottom 10 Items By Revenue'): revenue = clean.groupby( "Description")["Revenue"].sum().reset_index().sort_values( by="Revenue", ascending=False) revenue_tail = revenue.tail(10).sort_values(by="Revenue") fig2 = px.bar(revenue_tail, x="Revenue", y="Description", orientation="h") st.plotly_chart(fig2) if st.checkbox('View Top 10 Popular Items'): quantity = clean.groupby( "Description")["Quantity"].sum().reset_index().sort_values( by="Quantity", ascending=False) quantity_head = quantity.head(10).sort_values(by="Quantity") fig3 = px.bar(quantity_head, x="Quantity", y="Description", orientation="h") st.plotly_chart(fig3) if st.checkbox('View Least Popular Items'): qty1 = st.selectbox("Select Total Quantity Sold", [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], key="qty1") quantity = clean.groupby( "Description")["Quantity"].sum().reset_index().sort_values( by="Quantity", ascending=False) quantity_tail = quantity[quantity["Quantity"] == qty1].reset_index( drop=True) st.dataframe(quantity_tail[["Description"]]) st.subheader("Monthly Aggregation") if st.checkbox('View Monthly Top 10 Items By Revenue'): mth1 = st.selectbox("Select Month", [ "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December" ], key="mth1") monthrevenue = clean.groupby(["Month", "Description" ])["Revenue"].sum().reset_index() month_revenue = monthrevenue[monthrevenue["Month"] == mth1].sort_values(by="Revenue", ascending=False) month_revenue_head = month_revenue.head(10).sort_values( by="Revenue") fig4 = px.bar(month_revenue_head, x="Revenue", y="Description", orientation="h") st.plotly_chart(fig4) if st.checkbox('View Monthly Bottom 10 Items by Revenue'): mth2 = st.selectbox("Select Month", [ "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December" ], key="mth2") monthrevenue = clean.groupby(["Month", "Description" ])["Revenue"].sum().reset_index() month_revenue = monthrevenue[monthrevenue["Month"] == mth2].sort_values(by="Revenue", ascending=False) month_revenue_tail = month_revenue.tail(10).sort_values( by="Revenue") fig5 = px.bar(month_revenue_tail, x="Revenue", y="Description", orientation="h") st.plotly_chart(fig5) if st.checkbox('View Monthly Top 10 Popular Items'): mth3 = st.selectbox("Select Month", [ "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December" ], key="mth3") monthquantity = clean.groupby(["Month", "Description" ])["Quantity"].sum().reset_index() month_quantity = monthquantity[monthquantity["Month"] == mth3].sort_values(by="Quantity", ascending=False) month_quantity_head = month_quantity.head(10).sort_values( by="Quantity") fig6 = px.bar(month_quantity_head, x="Quantity", y="Description", orientation="h") st.plotly_chart(fig6) if st.checkbox('View Monthly Least Popular Items'): mth4 = st.selectbox("Select Month", [ "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December" ], key="mth4") qty2 = st.selectbox("Select Total Quantity Sold", [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], key="qty2") monthquantity = clean.groupby(["Month", "Description" ])["Quantity"].sum().reset_index() month_quantity_tail = monthquantity[ (monthquantity["Month"] == mth4) & (monthquantity["Quantity"] == qty2)].reset_index(drop=True) st.dataframe(month_quantity_tail[["Description"]]) if choices == 'Customer Lifetime Value': st.subheader('Customer Lifetime Value') st.subheader("Model Based On 30 Days") output = pd.read_csv('data/output_df.csv') output["predicted_purchases"] = output["predicted_purchases"].round() output["expected_total_monetary_value"] = output[ "predicted_purchases"] * output["expected_monetary_value"] #output=output.rename(columns={"probability":"probability_alive"}) if st.checkbox('View Predictions'): #st.dataframe(output[["CustomerID","predicted_purchases","expected_monetary_value","expected_total_monetary_value","probability_alive"]]) st.dataframe(output[[ "CustomerID", "predicted_purchases", "expected_monetary_value", "expected_total_monetary_value" ]]) def get_table_download_link(df): csv = df.to_csv(index=False) b64 = base64.b64encode(csv.encode()).decode() return f'<a href="data:file/csv;base64,{b64}" download="data/output_df.csv">Download</a>' st.markdown(get_table_download_link(output), unsafe_allow_html=True) if st.checkbox('View More On Expected Total Monetary Value'): exp_tot = output["expected_total_monetary_value"].describe( ).to_frame() st.dataframe(exp_tot) st.subheader("Boxplot") fig7 = px.box(output, y="expected_total_monetary_value") st.plotly_chart(fig7) st.subheader("Histogram") fig8 = px.histogram(output, x="expected_total_monetary_value") st.plotly_chart(fig8) if choices == 'Sales Revenue Forecasting': st.subheader('Sales Revenue Forecasting') df_load_state = st.text('Loading data...') df = load_data('data/data.csv') df_load_state.text('Loading data... done!') chart = df.groupby(['InvoiceDate'])[['Revenue']].sum() def plot_fig(): fig = go.Figure() fig.add_trace( go.Scatter(x=chart.index, y=chart['Revenue'], name="Revenue")) fig.layout.update(title_text='UK Revenue for year 2011 ', xaxis_rangeslider_visible=True) st.plotly_chart(fig) return fig # plotting the figure of Actual Data plot_fig() if st.checkbox('Show raw data'): st.subheader('Raw data') st.write(chart) #shape the df w.r.t requirement by fbProphet df_prophet = df.groupby(['InvoiceDate'], as_index=False)[['Revenue']].sum() #remove negative value #fbprophet works with 'None' df_prophet.iloc[21, 1] = None df_prophet.columns = ['ds', 'y'] #function to remove outliers def outliers_to_na(ts, devs): median = ts['y'].median() #print(median) std = np.std(ts['y']) #print(std) for x in range(len(ts)): val = ts['y'][x] #print(ts['y'][x]) if (val < median - devs * std or val > median + devs * std): ts['y'][x] = None return ts # remove outliers based on 2 std dev outliers_to_na(df_prophet, 2) #st.write(df_prophet) #season_choice = st.selectbox('Seasonality Mode',['additive','multiplicative']) #model_choice = st.selectbox('Model Choice',['Logistic Regression','Neural Network']) #if changepoint_prior_scale == 'additive': m = Prophet(seasonality_mode='additive', changepoint_prior_scale=0.11) m.fit(df_prophet) future = m.make_future_dataframe(periods=3, freq='M') future = m.predict(future) #plot forecast fig1 = plot_plotly(m, future) if st.checkbox('Show forecast data'): st.subheader('forecast data') st.write(future.loc[305:, ['ds', 'yhat']]) st.write( 'Quarterly Sales Revenue for Dec 2011, Jan 2012 , Feb 2012') st.plotly_chart(fig1) #plot component wise forecast st.write("Component wise forecast") fig2 = m.plot_components(future) st.write(fig2) if choices == 'About': st.subheader('About')