def forecasting_autoarima(y_train, y_test, s): fh = np.arange(len(y_test)) + 1 forecaster = AutoARIMA(sp=s) forecaster.fit(y_train) y_pred = forecaster.predict(fh) plot_ys(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"]) st.pyplot()
def main(opt, verbose=0): wind_speed = load_data(station=opt.station) y_train, y_test = wind_speed.iloc[:-opt.test_size], wind_speed.iloc[ -opt.test_size:] plot_ys(y_train, y_test, labels=("y_train", "y_test")) # ================================== Model ================================== emd = EMD() imfs = emd(wind_speed.values).T num_imfs = imfs.shape[1] imfs = pd.DataFrame(imfs, index=pd.RangeIndex(start=0, stop=len(imfs), step=1), columns=["imf%d" % i for i in range(num_imfs - 1)] + ["residue"]) y_trains_, y_tests_ = imfs.iloc[:-opt.test_size], imfs.iloc[-opt. test_size:] index = imfs.index[-opt.test_size:] columns = pd.MultiIndex.from_product( [["imf%d" % i for i in range(num_imfs)], ["step%d" % i for i in opt.steps]]) y_preds = pd.DataFrame(np.full((len(index), len(columns)), np.nan), index=index, columns=columns) for i in range(num_imfs): print("imf%d:" % i if i != num_imfs - 1 else "residue:") y_train_, y_test_ = y_trains_.iloc[:, i], y_tests_.iloc[:, i] if i in [0]: param_grid = { "regressor__clf__C": [1, 5, 10, 25, 50, 100, 150], "regressor__clf__gamma": ['scale', 0.001, 0.01, 0.1, 1.0], 'regressor__fs__percentile': range(10, 100, 10), } regressor = Pipeline([("fs", SelectPercentile(percentile=50, score_func=f_regression)), ("clf", SVR(C=5, gamma="scale"))]) else: param_grid = {"regressor__normalize": [True, False]} regressor = LassoLarsCV() forecaster = ReducedRegressionForecaster( regressor=regressor, window_length=opt.window_length, strategy=opt.strategy) grid_search = ParallelForecastingGridSearchCV( forecaster, cv=SlidingWindowSplitter(initial_window=int(len(y_train_) * 0.7)), param_grid=param_grid, scoring=make_forecasting_scorer(root_mean_squared_error, name="rmse"), n_jobs=opt.n_jobs, verbose=verbose) y_preds_ = multistep_forecasting(grid_search, y_train_, y_test_, steps=opt.steps) print([ root_mean_squared_error(y_test_, y_preds_["step%d" % step]) for step in opt.steps ]) y_preds["imf%d" % i] = y_preds_ y_preds = y_preds.swaplevel(1, 0, axis=1) y_preds = pd.concat([ y_preds["step%d" % step].sum(axis=1, skipna=False) for step in opt.steps ], axis=1) y_preds.columns = ["step%d" % i for i in opt.steps] y_preds.to_excel( "output/%s_%s.xls" % (opt.station, os.path.split(__file__)[-1].rsplit(".")[0].upper())) print([ root_mean_squared_error(y_test, y_preds["step%d" % step]) for step in opt.steps ])
def main(): df = datasets.load_airline( ) #Univariate, monthly records from 1949 to 60 (144 records) y_train, y_test = temporal_train_test_split( df, test_size=36) #36 months for testing forecaster = NaiveForecaster( strategy='seasonal_last', sp=12 ) #model strategy: last, mean, seasonal_last. sp=12months (yearly season) forecaster.fit(y_train) #fit fh = np.arange(1, len(y_test) + 1) #forecast horizon: array with the same lenght of y_test y_pred = forecaster.predict(fh) #pred forecaster2 = AutoARIMA(sp=12, suppress_warnings=True, trace=1) forecaster2.fit(y_train) y_pred2 = forecaster2.predict(fh) forecaster3 = ExponentialSmoothing(trend='add', damped='True', seasonal='multiplicative', sp=12) forecaster3.fit(y_train) y_pred3 = forecaster3.predict(fh) forecaster4 = ThetaForecaster(sp=12) forecaster4.fit(y_train) y_pred4 = forecaster4.predict(fh) forecaster5 = EnsembleForecaster([ ('NaiveForecaster', NaiveForecaster(strategy='seasonal_last', sp=12)), ('AutoARIMA', AutoARIMA(sp=12, suppress_warnings=True)), ('Exp Smoothing', ExponentialSmoothing(trend='add', damped='True', seasonal='multiplicative', sp=12)), ('Theta', ThetaForecaster(sp=12)) ]) forecaster5.fit(y_train) y_pred5 = forecaster5.predict(fh) plot_ys(y_train, y_test, y_pred, y_pred2, y_pred3, y_pred4, y_pred5, labels=[ 'Train', 'Test', 'Naive Forecaster', 'AutoARIMA', 'Exp Smoothing', 'Theta', 'Ensemble' ]) plt.xlabel('Months') plt.ylabel('Number of flights') plt.title( 'Time series of the number of international flights in function of time' ) plt.show() print('SMAPE Error for NaiveForecaster is:', 100 * round(smape_loss(y_test, y_pred), 3), '%') print('SMAPE Error for AutoARIMA is:', 100 * round(smape_loss(y_test, y_pred2), 3), '%') print('SMAPE Error for Exp Smoothing is:', 100 * round(smape_loss(y_test, y_pred3), 3), '%') print('SMAPE Error for Theta is:', 100 * round(smape_loss(y_test, y_pred4), 3), '%') print('SMAPE Error for Ensemble is:', 100 * round(smape_loss(y_test, y_pred5), 3), '%')
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV from sklearn.neighbors import KNeighborsRegressor from sklearn.linear_model import ElasticNet from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor # In[18]: from sktime.datasets import load_airline airlines = load_airline() # In[17]: FH = TEST_SIZE = 36 fh = np.arange(1, FH + 1) train, test = temporal_train_test_split(airlines, test_size=TEST_SIZE) plot_ys(train, test, labels=['train', 'test']) # ## Naive Forecaster # In[19]: strategies = ['last', 'mean', 'drift'] for strategy in strategies: forecaster = NaiveForecaster(strategy=strategy) forecaster.fit(train) y_pred = forecaster.predict(fh) plot_ys(train, test, y_pred, labels=['train', 'test', 'preds']) plt.title( f'strategy : {strategy} - smape_loss : {round(smape_loss(test,y_pred),4)}' )
tsplot(series, lags=x) st.pyplot() if st.checkbox("Select the columns to use for training"): columns = df.columns.tolist() selected_column = st.multiselect("Select Columns", columns) new_df = df[selected_column] st.write(new_df) if st.checkbox("Train/Test Split"): try: y_train, y_test = temporal_train_test_split( new_df.T.iloc[0]) st.text("Train Shape") st.write(y_train.shape) st.text("Test Shape") st.write(y_test.shape) plot_ys(y_train, y_test, labels=["y_train", "y_test"]) st.pyplot() except IndexError: st.write( "First select timeseries column to train, for further operation" ) if st.checkbox( "select the checkbox for tarining model on AutoArima"): y_train, y_test = temporal_train_test_split(new_df.T.iloc[0]) forecasting_autoarima(y_train, y_test, s) if regressor_choice == 'FBProphet': df = dataframe.copy() columns = df.columns.tolist() select_columns_to_plot = st.multiselect("Select columns to plot",
def main(): st.sidebar.title("What to do") activities = [ "Exploratory Data Analysis", "Plotting and Visualization", "Building Model", "About" ] choice = st.sidebar.selectbox("Select Activity", activities) # Add a slider to the sidebar: st.sidebar.markdown("# Lang") x = st.sidebar.slider('Select a lang for ACF and PACF analysis', 50, 60) # Add a slider to the sidebar: st.sidebar.markdown("# Seasonal") s = st.sidebar.slider( 'Select a seasonal parameter from previous ACF and PACF analysis', 24, 48) # cloud logo st.sidebar.title("Built on:") st.sidebar.image("src/ibmcloud_logo.png", width=200) # Upload file uploaded_file = st.file_uploader("Choose a CSV file", type="csv") if uploaded_file is not None and choice == "Exploratory Data Analysis": data = pd.read_csv(uploaded_file) st.subheader(choice) # Show dataset if st.checkbox("Show Dataset"): rows = st.number_input("Number of rows", 5, len(data)) st.dataframe(data.head(rows)) # Show columns if st.checkbox("Columns"): st.write(data.columns) # Data types if st.checkbox("Column types"): st.write(types(data)) # Show Shape if st.checkbox("Shape of Dataset"): data_dim = st.radio("Show by", ("Rows", "Columns", "Shape")) if data_dim == "Columns": st.text("Number of Columns: ") st.write(data.shape[1]) elif data_dim == "Rows": st.text("Number of Rows: ") st.write(data.shape[0]) else: st.write(data.shape) # Check null values in dataset if st.checkbox("Check null values"): nvalues = null_values(data) st.write(nvalues) # Show Data summary if st.checkbox("Show Data Summary"): st.text("Datatypes Summary") st.write(data.describe()) # Plot time series, ACF and PACF if st.checkbox("Select column as time series"): columns = data.columns.tolist() selected = st.multiselect("Choose", columns) series = data[selected] if st.button('Plot Time Series, ACF and PACF'): tsplot(series, lags=x) st.pyplot() elif uploaded_file is not None and choice == "Plotting and Visualization": st.subheader(choice) data = pd.read_csv(uploaded_file) df = data.copy() all_columns = df.columns.tolist() type_of_plot = st.selectbox("Select Type of Plot", [ "area", "line", "scatter", "pie", "bar", "correlation", "distribution" ]) if type_of_plot == "line": select_columns_to_plot = st.multiselect("Select columns to plot", all_columns) cust_data = df[select_columns_to_plot] st.line_chart(cust_data) elif type_of_plot == "area": select_columns_to_plot = st.multiselect("Select columns to plot", all_columns) cust_data = df[select_columns_to_plot] st.area_chart(cust_data) elif type_of_plot == "bar": select_columns_to_plot = st.multiselect("Select columns to plot", all_columns) cust_data = df[select_columns_to_plot] st.bar_chart(cust_data) elif type_of_plot == "pie": select_columns_to_plot = st.selectbox("Select a column", all_columns) st.write(df[select_columns_to_plot].value_counts().plot.pie()) st.pyplot() elif type_of_plot == "correlation": st.write( sns.heatmap(df.corr(), annot=True, linewidths=.5, annot_kws={"size": 7})) st.pyplot() elif type_of_plot == "scatter": st.write("Scatter Plot") scatter_x = st.selectbox("Select a column for X Axis", all_columns) scatter_y = st.selectbox("Select a column for Y Axis", all_columns) st.write(sns.scatterplot(x=scatter_x, y=scatter_y, data=df)) st.pyplot() elif type_of_plot == "distribution": select_columns_to_plot = st.multiselect("Select columns to plot", all_columns) st.write(sns.distplot(df[select_columns_to_plot])) st.pyplot() elif uploaded_file is not None and choice == "Building Model": st.subheader(choice) data = pd.read_csv(uploaded_file) df = data.copy() st.write("Select the columns to use for training") columns = df.columns.tolist() selected_column = st.multiselect("Select Columns", columns) new_df = df[selected_column] st.write(new_df) if st.checkbox("Train/Test Split"): y_train, y_test = temporal_train_test_split(new_df.T.iloc[0]) st.text("Train Shape") st.write(y_train.shape) st.text("Test Shape") st.write(y_test.shape) plot_ys(y_train, y_test, labels=["y_train", "y_test"]) st.pyplot() if st.button("Training a Model"): model_selection = st.selectbox("Model to train", ["AutoArima", "LSTM", "MLP", "RNN"]) if model_selection == "AutoArima": y_train, y_test = temporal_train_test_split(new_df.T.iloc[0]) forecasting_autoarima(y_train, y_test, s) elif choice == "About": st.title("About") st.write("The app developed by Alexander Robles.") st.write("Stack: Python, Streamlit, Docker, Kubernetes")
import numpy as np import pandas as pd from sktime.datasets import load_airline from sktime.forecasting.model_selection import temporal_train_test_split from sktime.performance_metrics.forecasting import smape_loss from sktime.utils.plotting.forecasting import plot_ys from sktime.forecasting.naive import NaiveForecaster st.write(''' ### 1 数据简介 > 使用著名的Box-Jenkins航空公司数据集,该数据集显示1949-1960年期间每月国际航班的乘客人数。除了使用原始时间序列(这是乘法时间序列的经典示例)之外,我们还将通过对原始数据执行对数转换来创建加法时间序列,因此我们可以将预测器与两种类型的模型进行比较。 ''') y = load_airline() st.dataframe(y.head()) fig, ax = plot_ys(y) ax.set(xlabel="Time", ylabel="Number of airline passengers") st.pyplot() st.write(''' ### 2 定义预测任务 * 接下来,我们将定义一个预测任务。我们将尝试使用前几年的训练数据来预测最近3年的数据。 该系列中的每个点代表一个月,因此我们应保留最后36个点作为测试数据,并使用36步超前的预测范围来评估预测效果。 * 我们将使用sMAPE(对称平均绝对百分比误差)来量化我们预测的准确性。 较低的sMAPE意味着较高的精度。 我们可以按以下方式拆分数据: ''') y_train, y_test = temporal_train_test_split(y, test_size=36) plot_ys(y_train, y_test, labels=["y_train", "y_test"]) st.pyplot() st.write("y_train.shape[0],y_test.shape[0]:", y_train.shape[0], y_test.shape[0])
# Import libraries import pandas as pd import numpy as np import plotly.offline as py import io import matplotlib.pyplot as plt plt.style.use("fivethirtyeight")# for pretty graphs from sktime.datasets import load_airline from sktime.forecasting.model_selection import temporal_train_test_split from sktime.utils.plotting.forecasting import plot_ys y = load_airline() y_train, y_test = temporal_train_test_split(y) plot_ys(y_train, y_test, labels=["y_train", "y_test"]) """create naive baseline""" import numpy as np from sktime.datasets import load_airline from sktime.forecasting.naive import NaiveForecaster from sktime.forecasting.model_selection import temporal_train_test_split from sktime.performance_metrics.forecasting import smape_loss y = load_airline() y_train, y_test = temporal_train_test_split(y) fh = np.arange(1, len(y_test) + 1) # forecasting horizon naive_forecaster_last = NaiveForecaster(strategy="last") naive_forecaster_last.fit(y_train)