Example #1
0
def forecasting_autoarima(y_train, y_test, s):
    fh = np.arange(len(y_test)) + 1
    forecaster = AutoARIMA(sp=s)
    forecaster.fit(y_train)
    y_pred = forecaster.predict(fh)
    plot_ys(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"])
    st.pyplot()
def main(opt, verbose=0):
    wind_speed = load_data(station=opt.station)

    y_train, y_test = wind_speed.iloc[:-opt.test_size], wind_speed.iloc[
        -opt.test_size:]
    plot_ys(y_train, y_test, labels=("y_train", "y_test"))

    # ================================== Model ==================================

    emd = EMD()
    imfs = emd(wind_speed.values).T

    num_imfs = imfs.shape[1]
    imfs = pd.DataFrame(imfs,
                        index=pd.RangeIndex(start=0, stop=len(imfs), step=1),
                        columns=["imf%d" % i
                                 for i in range(num_imfs - 1)] + ["residue"])

    y_trains_, y_tests_ = imfs.iloc[:-opt.test_size], imfs.iloc[-opt.
                                                                test_size:]

    index = imfs.index[-opt.test_size:]
    columns = pd.MultiIndex.from_product(
        [["imf%d" % i for i in range(num_imfs)],
         ["step%d" % i for i in opt.steps]])
    y_preds = pd.DataFrame(np.full((len(index), len(columns)), np.nan),
                           index=index,
                           columns=columns)

    for i in range(num_imfs):
        print("imf%d:" % i if i != num_imfs - 1 else "residue:")
        y_train_, y_test_ = y_trains_.iloc[:, i], y_tests_.iloc[:, i]

        if i in [0]:
            param_grid = {
                "regressor__clf__C": [1, 5, 10, 25, 50, 100, 150],
                "regressor__clf__gamma": ['scale', 0.001, 0.01, 0.1, 1.0],
                'regressor__fs__percentile': range(10, 100, 10),
            }
            regressor = Pipeline([("fs",
                                   SelectPercentile(percentile=50,
                                                    score_func=f_regression)),
                                  ("clf", SVR(C=5, gamma="scale"))])
        else:
            param_grid = {"regressor__normalize": [True, False]}
            regressor = LassoLarsCV()
        forecaster = ReducedRegressionForecaster(
            regressor=regressor,
            window_length=opt.window_length,
            strategy=opt.strategy)
        grid_search = ParallelForecastingGridSearchCV(
            forecaster,
            cv=SlidingWindowSplitter(initial_window=int(len(y_train_) * 0.7)),
            param_grid=param_grid,
            scoring=make_forecasting_scorer(root_mean_squared_error,
                                            name="rmse"),
            n_jobs=opt.n_jobs,
            verbose=verbose)
        y_preds_ = multistep_forecasting(grid_search,
                                         y_train_,
                                         y_test_,
                                         steps=opt.steps)
        print([
            root_mean_squared_error(y_test_, y_preds_["step%d" % step])
            for step in opt.steps
        ])
        y_preds["imf%d" % i] = y_preds_

    y_preds = y_preds.swaplevel(1, 0, axis=1)
    y_preds = pd.concat([
        y_preds["step%d" % step].sum(axis=1, skipna=False)
        for step in opt.steps
    ],
                        axis=1)
    y_preds.columns = ["step%d" % i for i in opt.steps]
    y_preds.to_excel(
        "output/%s_%s.xls" %
        (opt.station, os.path.split(__file__)[-1].rsplit(".")[0].upper()))

    print([
        root_mean_squared_error(y_test, y_preds["step%d" % step])
        for step in opt.steps
    ])
Example #3
0
def main():
    df = datasets.load_airline(
    )  #Univariate, monthly records from 1949 to 60 (144 records)
    y_train, y_test = temporal_train_test_split(
        df, test_size=36)  #36 months for testing

    forecaster = NaiveForecaster(
        strategy='seasonal_last', sp=12
    )  #model strategy: last, mean, seasonal_last. sp=12months (yearly season)
    forecaster.fit(y_train)  #fit
    fh = np.arange(1,
                   len(y_test) +
                   1)  #forecast horizon: array with the same lenght of y_test
    y_pred = forecaster.predict(fh)  #pred

    forecaster2 = AutoARIMA(sp=12, suppress_warnings=True, trace=1)
    forecaster2.fit(y_train)
    y_pred2 = forecaster2.predict(fh)

    forecaster3 = ExponentialSmoothing(trend='add',
                                       damped='True',
                                       seasonal='multiplicative',
                                       sp=12)
    forecaster3.fit(y_train)
    y_pred3 = forecaster3.predict(fh)

    forecaster4 = ThetaForecaster(sp=12)
    forecaster4.fit(y_train)
    y_pred4 = forecaster4.predict(fh)

    forecaster5 = EnsembleForecaster([
        ('NaiveForecaster', NaiveForecaster(strategy='seasonal_last', sp=12)),
        ('AutoARIMA', AutoARIMA(sp=12, suppress_warnings=True)),
        ('Exp Smoothing',
         ExponentialSmoothing(trend='add',
                              damped='True',
                              seasonal='multiplicative',
                              sp=12)), ('Theta', ThetaForecaster(sp=12))
    ])
    forecaster5.fit(y_train)
    y_pred5 = forecaster5.predict(fh)

    plot_ys(y_train,
            y_test,
            y_pred,
            y_pred2,
            y_pred3,
            y_pred4,
            y_pred5,
            labels=[
                'Train', 'Test', 'Naive Forecaster', 'AutoARIMA',
                'Exp Smoothing', 'Theta', 'Ensemble'
            ])
    plt.xlabel('Months')
    plt.ylabel('Number of flights')
    plt.title(
        'Time series of the number of international flights in function of time'
    )
    plt.show()

    print('SMAPE Error for NaiveForecaster is:',
          100 * round(smape_loss(y_test, y_pred), 3), '%')
    print('SMAPE Error for AutoARIMA is:',
          100 * round(smape_loss(y_test, y_pred2), 3), '%')
    print('SMAPE Error for Exp Smoothing is:',
          100 * round(smape_loss(y_test, y_pred3), 3), '%')
    print('SMAPE Error for Theta is:',
          100 * round(smape_loss(y_test, y_pred4), 3), '%')
    print('SMAPE Error for Ensemble is:',
          100 * round(smape_loss(y_test, y_pred5), 3), '%')
Example #4
0
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import ElasticNet
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor

# In[18]:

from sktime.datasets import load_airline
airlines = load_airline()

# In[17]:

FH = TEST_SIZE = 36
fh = np.arange(1, FH + 1)
train, test = temporal_train_test_split(airlines, test_size=TEST_SIZE)
plot_ys(train, test, labels=['train', 'test'])

# ## Naive Forecaster

# In[19]:

strategies = ['last', 'mean', 'drift']
for strategy in strategies:
    forecaster = NaiveForecaster(strategy=strategy)
    forecaster.fit(train)
    y_pred = forecaster.predict(fh)
    plot_ys(train, test, y_pred, labels=['train', 'test', 'preds'])
    plt.title(
        f'strategy : {strategy} - smape_loss : {round(smape_loss(test,y_pred),4)}'
    )
Example #5
0
                tsplot(series, lags=x)
                st.pyplot()
            if st.checkbox("Select the columns to use for training"):
                columns = df.columns.tolist()
                selected_column = st.multiselect("Select Columns", columns)
                new_df = df[selected_column]
                st.write(new_df)
            if st.checkbox("Train/Test Split"):
                try:
                    y_train, y_test = temporal_train_test_split(
                        new_df.T.iloc[0])
                    st.text("Train Shape")
                    st.write(y_train.shape)
                    st.text("Test Shape")
                    st.write(y_test.shape)
                    plot_ys(y_train, y_test, labels=["y_train", "y_test"])
                    st.pyplot()
                except IndexError:
                    st.write(
                        "First select timeseries column to train, for further operation"
                    )
            if st.checkbox(
                    "select the checkbox for tarining model on AutoArima"):
                y_train, y_test = temporal_train_test_split(new_df.T.iloc[0])
                forecasting_autoarima(y_train, y_test, s)

        if regressor_choice == 'FBProphet':

            df = dataframe.copy()
            columns = df.columns.tolist()
            select_columns_to_plot = st.multiselect("Select columns to plot",
Example #6
0
def main():
    st.sidebar.title("What to do")
    activities = [
        "Exploratory Data Analysis", "Plotting and Visualization",
        "Building Model", "About"
    ]
    choice = st.sidebar.selectbox("Select Activity", activities)
    # Add a slider to the sidebar:
    st.sidebar.markdown("# Lang")
    x = st.sidebar.slider('Select a lang for ACF and PACF analysis', 50, 60)
    # Add a slider to the sidebar:
    st.sidebar.markdown("# Seasonal")
    s = st.sidebar.slider(
        'Select a seasonal parameter from previous ACF and PACF analysis', 24,
        48)
    # cloud logo
    st.sidebar.title("Built on:")
    st.sidebar.image("src/ibmcloud_logo.png", width=200)
    # Upload file
    uploaded_file = st.file_uploader("Choose a CSV file", type="csv")

    if uploaded_file is not None and choice == "Exploratory Data Analysis":
        data = pd.read_csv(uploaded_file)
        st.subheader(choice)
        # Show dataset
        if st.checkbox("Show Dataset"):
            rows = st.number_input("Number of rows", 5, len(data))
            st.dataframe(data.head(rows))
        # Show columns
        if st.checkbox("Columns"):
            st.write(data.columns)
        # Data types
        if st.checkbox("Column types"):
            st.write(types(data))
        # Show Shape
        if st.checkbox("Shape of Dataset"):
            data_dim = st.radio("Show by", ("Rows", "Columns", "Shape"))
            if data_dim == "Columns":
                st.text("Number of Columns: ")
                st.write(data.shape[1])
            elif data_dim == "Rows":
                st.text("Number of Rows: ")
                st.write(data.shape[0])
            else:
                st.write(data.shape)
        # Check null values in dataset
        if st.checkbox("Check null values"):
            nvalues = null_values(data)
            st.write(nvalues)
        # Show Data summary
        if st.checkbox("Show Data Summary"):
            st.text("Datatypes Summary")
            st.write(data.describe())
        # Plot time series, ACF and PACF
        if st.checkbox("Select column as time series"):
            columns = data.columns.tolist()
            selected = st.multiselect("Choose", columns)
            series = data[selected]
            if st.button('Plot Time Series, ACF and PACF'):
                tsplot(series, lags=x)
                st.pyplot()

    elif uploaded_file is not None and choice == "Plotting and Visualization":
        st.subheader(choice)
        data = pd.read_csv(uploaded_file)
        df = data.copy()
        all_columns = df.columns.tolist()
        type_of_plot = st.selectbox("Select Type of Plot", [
            "area", "line", "scatter", "pie", "bar", "correlation",
            "distribution"
        ])

        if type_of_plot == "line":
            select_columns_to_plot = st.multiselect("Select columns to plot",
                                                    all_columns)
            cust_data = df[select_columns_to_plot]
            st.line_chart(cust_data)

        elif type_of_plot == "area":
            select_columns_to_plot = st.multiselect("Select columns to plot",
                                                    all_columns)
            cust_data = df[select_columns_to_plot]
            st.area_chart(cust_data)

        elif type_of_plot == "bar":
            select_columns_to_plot = st.multiselect("Select columns to plot",
                                                    all_columns)
            cust_data = df[select_columns_to_plot]
            st.bar_chart(cust_data)

        elif type_of_plot == "pie":
            select_columns_to_plot = st.selectbox("Select a column",
                                                  all_columns)
            st.write(df[select_columns_to_plot].value_counts().plot.pie())
            st.pyplot()

        elif type_of_plot == "correlation":
            st.write(
                sns.heatmap(df.corr(),
                            annot=True,
                            linewidths=.5,
                            annot_kws={"size": 7}))
            st.pyplot()

        elif type_of_plot == "scatter":
            st.write("Scatter Plot")
            scatter_x = st.selectbox("Select a column for X Axis", all_columns)
            scatter_y = st.selectbox("Select a column for Y Axis", all_columns)
            st.write(sns.scatterplot(x=scatter_x, y=scatter_y, data=df))
            st.pyplot()

        elif type_of_plot == "distribution":
            select_columns_to_plot = st.multiselect("Select columns to plot",
                                                    all_columns)
            st.write(sns.distplot(df[select_columns_to_plot]))
            st.pyplot()

    elif uploaded_file is not None and choice == "Building Model":
        st.subheader(choice)
        data = pd.read_csv(uploaded_file)
        df = data.copy()
        st.write("Select the columns to use for training")
        columns = df.columns.tolist()
        selected_column = st.multiselect("Select Columns", columns)
        new_df = df[selected_column]
        st.write(new_df)

        if st.checkbox("Train/Test Split"):
            y_train, y_test = temporal_train_test_split(new_df.T.iloc[0])
            st.text("Train Shape")
            st.write(y_train.shape)
            st.text("Test Shape")
            st.write(y_test.shape)
            plot_ys(y_train, y_test, labels=["y_train", "y_test"])
            st.pyplot()

        if st.button("Training a Model"):
            model_selection = st.selectbox("Model to train",
                                           ["AutoArima", "LSTM", "MLP", "RNN"])
            if model_selection == "AutoArima":
                y_train, y_test = temporal_train_test_split(new_df.T.iloc[0])
                forecasting_autoarima(y_train, y_test, s)

    elif choice == "About":
        st.title("About")
        st.write("The app developed by Alexander Robles.")
        st.write("Stack: Python, Streamlit, Docker, Kubernetes")
Example #7
0
import numpy as np
import pandas as pd
from sktime.datasets import load_airline
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.performance_metrics.forecasting import smape_loss
from sktime.utils.plotting.forecasting import plot_ys

from sktime.forecasting.naive import NaiveForecaster

st.write('''
    ### 1 数据简介
    > 使用著名的Box-Jenkins航空公司数据集,该数据集显示1949-1960年期间每月国际航班的乘客人数。除了使用原始时间序列(这是乘法时间序列的经典示例)之外,我们还将通过对原始数据执行对数转换来创建加法时间序列,因此我们可以将预测器与两种类型的模型进行比较。
''')
y = load_airline()
st.dataframe(y.head())
fig, ax = plot_ys(y)
ax.set(xlabel="Time", ylabel="Number of airline passengers")
st.pyplot()

st.write('''
    ### 2 定义预测任务
    * 接下来,我们将定义一个预测任务。我们将尝试使用前几年的训练数据来预测最近3年的数据。 该系列中的每个点代表一个月,因此我们应保留最后36个点作为测试数据,并使用36步超前的预测范围来评估预测效果。
    * 我们将使用sMAPE(对称平均绝对百分比误差)来量化我们预测的准确性。 较低的sMAPE意味着较高的精度。
    我们可以按以下方式拆分数据:
''')
y_train, y_test = temporal_train_test_split(y, test_size=36)
plot_ys(y_train, y_test, labels=["y_train", "y_test"])
st.pyplot()
st.write("y_train.shape[0],y_test.shape[0]:", y_train.shape[0],
         y_test.shape[0])
# Import libraries
import pandas as pd
import numpy as np
import plotly.offline as py
import io
import matplotlib.pyplot as plt
plt.style.use("fivethirtyeight")# for pretty graphs

from sktime.datasets import load_airline
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.utils.plotting.forecasting import plot_ys

y = load_airline()
y_train, y_test = temporal_train_test_split(y)
plot_ys(y_train, y_test, labels=["y_train", "y_test"])

"""create naive baseline"""

import numpy as np
from sktime.datasets import load_airline
from sktime.forecasting.naive import NaiveForecaster
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.performance_metrics.forecasting import smape_loss

y = load_airline()
y_train, y_test = temporal_train_test_split(y)
fh = np.arange(1, len(y_test) + 1)  # forecasting horizon

naive_forecaster_last = NaiveForecaster(strategy="last")
naive_forecaster_last.fit(y_train)