Ejemplo n.º 1
0
def run():
    journal = ledger.read_journal("./secret/ledger.dat")
    last_post = None
    amount = 0

    for post in journal.query(""):
        if last_post == None or post.date == last_post.date:
            if str(post.amount.commodity) != "£":
                continue
            amount = amount + post.amount
        else:
            print post.date, ",", amount
            amount = 0
        last_post = post

    df = pd.read_csv('./testing.csv')
    df['y'] = np.multiply(100, df['y'])

    m = Prophet()
    m.fit(df);

    forecast = m.predict(future)
    forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()

    m.plot(forecast);
    m.plot_components(forecast);
Ejemplo n.º 2
0
df.head()

df['x'] = pd.to_datetime(df['Date'], format='%m/%d/%Y %I:%M:%S %p')

df['x2'] = df['x'].dt.strftime('%Y-%m-%d')

data = pd.DataFrame(df['x2'].value_counts(sort=False).reset_index())
data.columns = ['ds', 'y']
crimes = data.sort_values(by='ds')
crimes.reset_index(inplace=True)


# create a Prophet model
m = Prophet()

# fit the dataframe to the Prophet model
m.fit(crimes)

# create a new period to forecast
future = m.make_future_dataframe(periods=365)
future.head()

# making the forecast
forecast = m.predict(future)


# plotting the forecast
fig1 = m.plot(forecast)
fig2 = m.plot_components(forecast)
Ejemplo n.º 3
0
            x=bikerides['ds'],
            y=bikerides['rain'],
            name='Rain',
        ))

if temp_con == True:
    fig.add_trace(
        go.Scatter(
            x=bikerides['ds'],
            y=bikerides['temp'],
            name='Temp',
        ))

st.write(fig)

st.write(m.plot(forecast))
st.write(m.plot_components(forecast))

st.write(getPerformanceMetrics(m).mean())

st.header('Forecasting Rides')
period = st.slider(
    'Forecast Periods in days',
    1,
    365,
)
st.write("Forecast Periods ", period, " days")

# We must create a data frame holding dates for our forecast. The periods # parameter counts days as long as the frequency is 'D' for the day. Let's # do a 180 day forecast, approximately half a year.
future = m.make_future_dataframe(periods=period, freq='D')
def compare_models(data, variable, test_size):

    test_split = len(data) - test_size

    # simple model using mean electricity use by month, weekday and hour
    data_train = data[:test_split][[variable]]
    data_train_grouped = (data_train[[variable]].groupby([
        data_train.index.month, data_train.index.weekday, data_train.index.hour
    ]).mean())
    data_train_grouped.index.names = ["month", "weekday", "hour"]

    data_test = pd.DataFrame(
        data={
            "month": data[test_split:].index.month,
            "weekday": data[test_split:].index.weekday,
            "hour": data[test_split:].index.hour,
        },
        index=data[test_split:].index,
    )

    mean_grouped_predictions = data_test.join(data_train_grouped,
                                              how="left",
                                              on=["month", "weekday",
                                                  "hour"])[variable]

    # preparing data for prophet
    df = data[variable].reset_index(level=0)
    df.columns = ["ds", "y"]

    df_train = df[:test_split]
    df_test = df[test_split:]

    m_simple = Prophet()
    m_simple.fit(df_train)
    future_simple = m_simple.make_future_dataframe(periods=test_size, freq="H")
    forecast_simple = m_simple.predict(future_simple)
    # limiting low predictions to zero
    forecast_simple["yhat"] = np.where(forecast_simple["yhat"] < 0, 0,
                                       forecast_simple["yhat"])
    forecast_simple["yhat_lower"] = np.where(forecast_simple["yhat_lower"] < 0,
                                             0, forecast_simple["yhat_lower"])
    forecast_simple["yhat_upper"] = np.where(forecast_simple["yhat_upper"] < 0,
                                             0, forecast_simple["yhat_upper"])
    global forecast_plot_simple
    global component_plot_simple

    forecast_plot_simple = m_simple.plot(forecast_simple)
    component_plot_simple = m_simple.plot_components(forecast_simple)

    # using inbuilt holidays because this automatically applies to predictions also
    m_holiday = Prophet()
    m_holiday.add_country_holidays(country_name="FRA")
    m_holiday.fit(df_train)
    future_holiday = m_holiday.make_future_dataframe(periods=test_size,
                                                     freq="H")
    forecast_holiday = m_holiday.predict(future_holiday)
    # limiting low predictions to zero
    forecast_holiday["yhat"] = np.where(forecast_holiday["yhat"] < 0, 0,
                                        forecast_holiday["yhat"])
    forecast_holiday["yhat_lower"] = np.where(
        forecast_holiday["yhat_lower"] < 0, 0, forecast_holiday["yhat_lower"])
    forecast_holiday["yhat_upper"] = np.where(
        forecast_holiday["yhat_upper"] < 0, 0, forecast_holiday["yhat_upper"])
    global forecast_plot_holiday
    global component_plot_holiday

    forecast_plot_holiday = m_holiday.plot(forecast_holiday)
    component_plot_holiday = m_holiday.plot_components(forecast_holiday)

    m_temp = Prophet()
    m_temp.add_regressor("temperature")
    m_temp.add_regressor("temperature2")
    m_temp.add_regressor("temperature_lag")
    m_temp.add_regressor("temperature2_lag")
    df_train["temperature"] = data["temperature"][:test_split].to_numpy()
    df_train["temperature2"] = df_train["temperature"]**2
    df_train["temperature_lag"] = df_train["temperature"].shift(
        1, fill_value=df_train["temperature"].mean())
    df_train["temperature2_lag"] = df_train["temperature2"].shift(
        1, fill_value=df_train["temperature"].mean()**2)
    m_temp.fit(df_train)
    future_temp = m_temp.make_future_dataframe(periods=test_size, freq="H")
    future_temp["temperature"] = data["temperature"][-len(future_temp
                                                          ):].to_numpy()
    future_temp["temperature2"] = future_temp["temperature"]**2
    future_temp["temperature_lag"] = future_temp["temperature"].shift(
        1, fill_value=future_temp["temperature"].mean())
    future_temp["temperature2_lag"] = future_temp["temperature2"].shift(
        1, fill_value=future_temp["temperature"].mean()**2)
    forecast_temp = m_temp.predict(future_temp)
    # limiting low predictions to zero
    forecast_temp["yhat"] = np.where(forecast_temp["yhat"] < 0, 0,
                                     forecast_temp["yhat"])
    forecast_temp["yhat_lower"] = np.where(forecast_temp["yhat_lower"] < 0, 0,
                                           forecast_temp["yhat_lower"])
    forecast_temp["yhat_upper"] = np.where(forecast_temp["yhat_upper"] < 0, 0,
                                           forecast_temp["yhat_upper"])

    global forecast_plot_temp
    global component_plot_temp

    forecast_plot_temp = m_temp.plot(forecast_temp)
    component_plot_temp = m_temp.plot_components(forecast_temp)

    # calculate rmse

    df_test.y.describe()
    print(
        "Mean RMSE: ",
        mean_squared_error(df_test.y, np.repeat(df_train.y.mean(),
                                                len(df_test))),
    )
    print("Mean grouped RMSE: ",
          mean_squared_error(df_test.y, mean_grouped_predictions))
    print(
        "Simple Prophet: ",
        mean_squared_error(df_test.y, forecast_simple.yhat[test_split:]),
    )
    print(
        "Holiday Prophet: ",
        mean_squared_error(df_test.y, forecast_holiday.yhat[test_split:]),
    )
    print(
        "Temperature Prophet: ",
        mean_squared_error(df_test.y, forecast_temp.yhat[test_split:]),
    )
Ejemplo n.º 5
0
    model_fit = mod.fit(disp=0)
    ret = pd.DataFrame(model_fit.predict())

    f, axarr = plt.subplots(1, sharex=True)
    arg.y.plot(color='b', linestyle='-', ax=axarr)
    ret.plot(color='r', linestyle='--', ax=axarr)
    plt.show()


if __name__ == "__main__":
    data = pd.read_csv(INPUT_FILE_PATH)
    data["y"] = data["1"] + data["2"] + data["3"] + data["4"] + data[
        "5"] + data["6"]
    train_data = pd.DataFrame([data['ds'], data['y']]).T.sort_values(by="ds")

    # max 369 min 42
    m = Prophet(seasonality_mode='multiplicative',
                changepoint_prior_scale=100,
                interval_width=1)  # Default growth='linear'
    m.fit(train_data)
    future = m.make_future_dataframe(periods=1, freq='w')
    forecast = m.predict(future)
    fig1 = m.plot(forecast, uncertainty=False)
    plt.show()

    train_data.index = train_data["ds"]
    train_data = train_data["y"]

    print("Predict Value : {}".format(math.floor(
        forecast.yhat[-1:].values[0])))
#3: PREPARING THE DATA
chicago_prophet = chicago_df.resample('M').size().reset_index()

print(chicago_prophet)

chicago_prophet.columns = ['Date', 'Crime Count']

chicago_prophet_df = pd.DataFrame(chicago_prophet)
print(chicago_prophet_df)

#4: MAKE PREDICTIONS

print(chicago_prophet_df.columns)

chicago_prophet_df_final = chicago_prophet_df.rename(columns={
    'Date': 'ds',
    'Crime Count': 'y'
})
print(chicago_prophet_df_final)

m = Prophet()
m.fit(chicago_prophet_df_final)

# Forcasting into the future
future = m.make_future_dataframe(periods=365)
forecast = m.predict(future)
print(forecast)

figure = m.plot(forecast, xlabel='Date', ylabel='Crime Rate')
figure3 = m.plot_components(forecast)
Ejemplo n.º 7
0
organic_df = df[df['type']=='organic']
organic_df

plt.figure(figsize=(20,5))
plt.plot(organic_df['Date'], organic_df['AveragePrice'])
plt.xticks([])

organic_df = organic_df.rename(columns={'Date':'ds', 'AveragePrice':'y'})

m = Prophet()
m.fit(organic_df)
future = m.make_future_dataframe(periods=365)
forecast = m.predict(future)

figure = m.plot(forecast, xlabel='Date', ylabel='Price')

decomposed_figure = m.plot_components(forecast)

"""##Price Prediction with Conventional Type"""

conventional_df = df[df['type']=='conventional']
conventional_df

plt.figure(figsize=(20,5))
plt.plot(conventional_df['Date'], conventional_df['AveragePrice'])
plt.xticks([])

conventional_df = conventional_df.rename(columns={'Date':'ds', 'AveragePrice':'y'})

m = Prophet()
Ejemplo n.º 8
0
confirmed.drop(confirmed[confirmed.y == 0].index, inplace=True)

length = int(confirmed.shape[0] * 0.80)
#Split to Train set and test set
train_set = confirmed.iloc[:length]
test_set = confirmed.iloc[length:]

model = Prophet(interval_width=0.95)

model.fit(train_set)
#predict 20 days
future = model.make_future_dataframe(periods=30)
forecast = model.predict(future)

#plot
confirmed_forecast_plot = model.plot(forecast)

forecast_components = model.plot_components(forecast)

#Prepare
test_forecast = forecast[forecast.index >= length]

test_score = test_forecast.set_index('ds')[['yhat']].join(
    test_set.set_index('ds').y).reset_index()

#Drop zero Values
test_score.dropna(inplace=True)

from sklearn.metrics import mean_squared_error, mean_absolute_error
print('MAE:', mean_absolute_error(test_score.yhat, test_score.y))
print('RMSE:', np.sqrt(mean_squared_error(test_score.yhat, test_score.y)))
Ejemplo n.º 9
0
def grid_search_worker(event, context={}):
    # Time series model settings
    parameter_list = event['parameters']
    parameters = {}
    for key in parameter_list:
        # Special case for holidays, because it is a json string
        if key == 'holidays':
            holidays_dict = json.loads(event['data'][key])
            continue
        parameters[key] = event['data'][key]

    forecast = event['forecast']
    print('=====Parameters=======')
    print(parameters)

    # Read the dataset from S3 bucket
    df = read_csv_s3(parameters['dataset'])
    # df = pd.read_csv("./datasets/prophet/example_wp_log_peyton_manning.csv")

    # Transfer holiday to data frame
    if holidays_dict is not None:
        parameters['holidays'] = pd.DataFrame({
            'holiday':
            holidays_dict['holiday'],
            'ds':
            pd.to_datetime(holidays_dict['ds']),
            'lower_window':
            holidays_dict['lower_window'],
            'upper_window':
            holidays_dict['upper_window'],
        })

    # Fit the model
    df['cap'] = parameters['cap']
    df['floor'] = parameters['floor']
    model = Prophet(
        growth=parameters['growth'],
        changepoint_prior_scale=parameters['changepoint_prior_scale'],
        holidays=parameters['holidays'],
        holidays_prior_scale=parameters['holidays_prior_scale'],
        seasonality_mode=parameters['seasonality_mode'],
        interval_width=parameters['interval_width'])

    model.add_seasonality(name='yearly',
                          period=365,
                          fourier_order=parameters['fourier_order'],
                          prior_scale=parameters['seasonality_prior_scale'])
    model.add_country_holidays(country_name=parameters['country_holidays'])

    # Truncate the time series

    df.loc[(df['ds'] <= parameters['left_bound']) &
           (df['ds'] >= parameters['right_bound']), 'y'] = None

    print("=====Fit the Model=======")
    model.fit(df)

    if forecast == 0:
        # Cross validation the model
        print("=====Cross Validation=======")
        average_metric = cross_validation_worker(model, parameters['initial'],
                                                 parameters['period'],
                                                 parameters['horizon'],
                                                 parameters['metric'])
        print("Metric {0}: {1}".format(parameters['metric'], average_metric))
        return {'average_metric': average_metric, 'event': event}

    else:
        future = model.make_future_dataframe(periods=int(forecast))
        forecast = model.predict(future)
        time_series = model.plot(forecast)
        components = model.plot_components(forecast)
        time_series.savefig(local_repo + '/time_series.png')
        upload_csv_s3(local_repo + '/time_series.png')
        components.savefig(local_repo + '/components.png')
        upload_csv_s3(local_repo + '/components.png')
        return "Graphs are uploaded to S3"
Ejemplo n.º 10
0
def main():
    st.header('Online Stock Price Ticker')

    # yfiance 실행

    symbol = st.text_input(
        '심볼 입력 : '
    )  # 이렇게 하면 밑에 심볼을 포맷으로 해놓은것에 유저가 입력할때마다 들어가서 보고싶은 회사의 주식을 볼수 있음
    # symbol = 'MSFT'  # MSFT 는 마이크로 소프트   # 내가 보고싶은 종목(회사)를 바꾸고 싶으면 여기를 바꾸면 됨
    data = yf.Ticker(symbol)

    today = datetime.now().date().isoformat(
    )  # 연월일 만 가져올땐 date(), 시분초만 가져올땐 time()  # isoformat는 숫자들을 문자열로
    print(today)

    df = data.history(start='2010-06-01', end='2021-03-22')
    st.dataframe(df)

    st.subheader('종가')  # 종가 차트 그리기
    st.line_chart(df['Close'])

    st.subheader('거래량')  # 거래량 차트 그리기
    st.line_chart(df['Volume'])

    # yfiance 라이브러리만의 정보
    # data.info  # 내가 보는 회사의 정보들을 알려줌
    # data.calendar
    # data.major_holders # 대주주가 누구인가를 알수 있음
    # data.institutional_holders  # 어떤사람들이 주식 가지고 있는지 알수 있음
    # data.recommendations  # 어느기관이 샀는지 시간별로 알수 있음
    # data.dividends # 배당금 정보 알 수 있음

    div_df = data.dividends
    st.dataframe(div_df.resample('Y').sum())  # 배당금 각 연도별로 알수있다  # resample !!

    new_df = div_df.reset_index(
    )  # 지금 현재 날짜를 인덱스 번호로 해서 컬럼으로 만든다   # 그래서 연도별 배당금을 알수있다 차트로 # 이렇게 바꾸면 이따가 prophet에도 쉽게 사용가능 하다
    new_df['Year'] = new_df['Date'].dt.year
    # 배당금을 차트로
    fig = plt.figure()
    plt.bar(new_df['Year'], new_df['Dividends'])
    st.pyplot(fig)

    # 여러 주식 데이터를 한번에 보여주기
    favorites = ['msft', 'tsla', 'nvda', 'aapl', 'amzn']  # 이거를 심볼로 놔도됨
    f_df = pd.DataFrame()  # 각각의 종가(Close)들을 비어있는 데이터프레임에 넣어서 보여주고 싶을 때
    for stock in favorites:
        f_df[stock] = yf.Ticker(stock).history(
            start='2010-01-01', end=today)['Close']  # 변수로 저장해서 2-3줄로도 가능
        # 비어있던 f_df에 stock라고 만든 컬럼에 이값들을 넣어달라
    st.dataframe(f_df)
    # 위에것을 한번에 차트 나타내라
    st.line_chart(f_df)  # 이런부분 멀티셀렉트로 하면 좋을 것 같다

    # API 호출을 위한 라이브러리 임폴트 하고 와야해
    # 구글에 주소 가져올때 했었음
    # pip install requests 이거 설치하면 이제 불러올수 있고 stocktwits 주소 넣으면 됨
    res = requests.get(
        'https://api.stocktwits.com/api/2/streams/symbol/{}.json'.format(
            symbol)
    )  # 원하는 회사의 데이터를 보고 싶을 때 # 이렇게 만들었으면 맨 위에 text_input 만들어서 보게 할수도 있음
    # json 형식이므로 , .json()
    res_data = res.json()  # 리스트와 딕셔너리 형태로 불러옴
    # 파이썬의 딕셔너리와 리스트이 조합으로 사용가능
    #st.write(res_data)

    for message in res_data['messages']:
        # message 키에 있는 내용들 가져와서
        #st.write(message)  # 메세지 30개의 전체 내용들을 볼수 있음

        col1, col2 = st.beta_columns([1, 4])  # 1 : 4의 비율로 보여달라

        with col1:
            st.image(message['user']['avatar_url'])
        with col2:
            st.write('유저 이름 : ' + message['user']['username'])
            st.write('트윗 내용 : ' + message['body'])
            st.write('올린 시간 : ' + message['created_at'])

        # st.image( message['user']['avatar_url'] )   # col, with 빼고 포문에 이것만 넣으면 다 한줄에 그냥 볼수있고  위에처럼 바꾸면 예쁘게 정리한모습으로 볼수 있음
        # st.write( message['user']['username'] )
        # st.write( message['body'] )
        # st.write( message['created_at'] )

    # 여기서 부터는 프로펫 !!!
    p_df = df.reset_index()
    p_df.rename(columns={'Data': 'ds', 'Close': 'y'}, inplace=True)
    # st.dataframe(p_df)

    # 이제 예측가능 !
    m = Prophet()
    m.fit(p_df)
    future = m.make_future_dataframe(periods=365)  # 1년치 데이터를 만들어 놓고
    forecast = m.predict(future)  #  >>> 예측해달라
    st.dataframe(forecast)

    # 위에 차트 그려라
    fig1 = m.plot(forecast)
    st.pyplot(fig1)

    fig2 = m.plot_components(forecast)
    st.pyplot(fig2)
Ejemplo n.º 11
0
#otherwise similar to sk-learn
predictions = model.predict(future_timestamps)

#let's take a look at our model's predictions
print(predictions.head())

#now we observe a lot of data from this, again to learn more you should learn about ARIMA
#but here are some important take aways
#ds: the datestamp
#yhat: what statistics calls the forecasted value for y as time ds
#yhat_lower: lower bounds of our forecast at ds
#yhat_upper: upper bounds of our forecast at ds

#conviently, Prophet has built in ploting abilities
#which will plot what it believe to be important things
model.plot(predictions, uncertainty=True)
plt.ylabel('Monthly Number of Airline Passengers')
plt.xlabel('Date')
plt.show()

#the black dots are the observed values from the training set
#the thick blue line is yhat
#and the light blue is the area between yhat_upper and yhat_lower

#and also, wow that is some serious predictive power!

#another cool feature is that Prophet can easily show you what it is using to build it's model
#or in other words, the components of the model
model.plot_components(predictions)
plt.show()
Ejemplo n.º 12
0
from fbprophet import Prophet
# import matplotlib.pyplot as plt
# get_ipython().run_line_magic('matplotlib', 'inline')

# 读入数据集
train = pd.read_csv('./train.csv')
print(train.head())
print(train.tail())

train['Datetime'] = pd.to_datetime(train.Datetime, format='%d-%m-%Y %H:%M')
train.index = train.Datetime
print(train.head())

train.drop(['ID', 'Datetime'], axis=1, inplace=True)
print(train.head())
daily_train = train.resample('D').sum()
daily_train['ds'] = daily_train.index
daily_train['y'] = daily_train.Count
daily_train.drop(['Count'], axis=1, inplace=True)
print(daily_train.head())

m = Prophet(yearly_seasonality=True, seasonality_prior_scale=0.1)
m.fit(daily_train)
future = m.make_future_dataframe(periods=213)
forcast = m.predict(future)
print(forcast)

m.plot(forcast)

m.plot_components(forcast)
holidays = pd.concat(
    (easter, samsung_preorder, samsung_launch, apple_preorder, apple_launch,
     black_friday, cyber_monday, thanksgiving, christmas, independence_day))

# Define and fit the model
m = Prophet(holidays=holidays,
            daily_seasonality=True,
            yearly_seasonality=6,
            changepoint_range=0.9,
            changepoint_prior_scale=0.01)
# changepoints = ['2018-01-01']
# m.add_country_holidays(country_name = 'US')
m.train_holiday_names
m.fit(df)

# Create a dataframe to hold predictions
future = m.make_future_dataframe(periods=243)
future.tail()

# Make predictions
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(31)
forecast.to_csv('jan_aug_forecast.csv')

# Plot forecasts
fig = m.plot(forecast)
add_changepoints_to_plot(fig.gca(), m, forecast)

# Plot forecast components
m.plot_components(forecast)
Ejemplo n.º 14
0
import matplotlib.pyplot as plt

# 读入数据集
df = pd.read_csv('./train.csv')
df['Datetime'] = pd.to_datetime(df.Datetime, format='%d-%m-%Y %H:%M')
df.index = df.Datetime
df.drop(['ID', 'Datetime'], axis=1, inplace=True)
# 按天聚合
daily_df = df.resample('D').sum()
# 将时间顺序列名修改符合Prophet要求
daily_df['ds'] = daily_df.index
daily_df['y'] = daily_df.Count
daily_df.drop('Count', axis=1, inplace=True)
print(daily_df.head())

# 拟合Prophet模型
model = Prophet(yearly_seasonality=True, seasonality_prior_scale=0.1)
model.fit(daily_df)
# 预测未来七个月(213天)
future = model.make_future_dataframe(periods=213)
forcast = model.predict(future)
# print(forcast)

# 查看预测各成份
model.plot_components(forcast)

# 绘制预测
model.plot(forcast)
plt.show()

#print(forecast1[['ds','yhat','yhat_lower','yhat_upper']])

#%%
## This works and retained the dataframe type
forecast2 = np.exp(forecast1[['yhat','yhat_lower','yhat_upper']])

## Now merge to bring the ds back into the df
## Without the "on" keyword the join key is implicitly the index which is what we're doing here
forecast2 = forecast2.join(forecast1['ds'], how='inner')



#%%
## This works
## This will create a plot that includes Forecasted, C.I.'s, and Actual values
m.plot(forecast)

#%%
## Save a copy of the plot
fig = m.plot(forecast)  
fig.savefig("/home/steve/forecast_raw.jpeg")

#%%
## I think it is unecessary to review exponentiated components 
## Plus the complexity of joining forecast2 with forecast1
m.plot_components(forecast1);

#%%
## It was necessary, in the fill_between, to use a datetime index associated with 
## the first parameter of the function.
## This necessitated converting the existing ds datetime element to an index
Ejemplo n.º 16
0
import pandas as pd
import numpy as np
from fbprophet import Prophet

# Prep the dataset

data = pd.read_csv("/home/dusty/Econ8310/DataSets/chicagoBusRiders.csv")
route3 = data[data.route=='3'][['date','rides']]
route3.date = pd.to_datetime(route3.date, infer_datetime_format=True)
route3.columns = [['ds', 'y']]

# Initialize Prophet instance and fit to data

m = Prophet()
m.fit(route3)

# Create timeline for 1 year in future, then generate predictions based on that timeline

future = m.make_future_dataframe(periods=365)
forecast = m.predict(future)

# Create plots of forecast and truth, as well as component breakdowns of the trends

plt = m.plot(forecast)
plt.show()

comp = m.plot_components(forecast)
comp.show()
def show_trend(df,
               name='Ontario',
               typeToVis="contracted",
               n_changepoints=2,
               startDate=None,
               returnForcast=False,
               typeOfFit='linear'):
    """
    Show trend of log10(@variable) using fbprophet package.
    @ncov_df <pd.DataFrame>: the clean data
    @variable <str>: variable name to analyse
        - if Confirmed, use Infected + Recovered + Deaths
    @n_changepoints <int>: max number of change pointsimport matplotlib.pyplot as plt
    from fbprophet import Prophet
    from fbprophet.plot import add_changepoints_to_plot
    import numpy as np
    import pandas as pd
    from datetime import date, timedelta
    @kwargs: keword arguments of select_area()
    """

    # get only typeToVis and convert the data to log
    # note that we are replacing anything with inf - -inf to 0 there are nans basically
    # or the crit points of log
    if startDate == None:
        df = df[typeToVis]
    else:
        # split the date comming in
        splitDate = startDate.split('-')
        int(splitDate[2]), int(splitDate[1]), int(splitDate[0])
        # create a time index
        startDate = date(int(splitDate[0]), int(splitDate[1]),
                         int(splitDate[2]))
        endDate = date.today()
        df = df.loc[startDate:endDate, typeToVis]

    if typeOfFit == 'linear':
        df = np.log10(df).replace([np.inf, -np.inf], 0)

    # massage the date to be placed in fbprophet
    # the data frame must contain coloumn with "ds" and "y"
    # these are string dates and values respectively
    dfProphetIn = pd.DataFrame({'ds': df.index, 'y': df}).reset_index()
    dfProphetIn = dfProphetIn.drop(columns='date')
    if typeOfFit == 'logistic':
        dfProphetIn['cap'] = 15000
    #dfProphetIn.tail()

    # fbprophet
    # note that the growth in linear here because we transformed the data into log scale
    # pipeline is as follows:
    # model generation ---> model fit ----> model generate future ----> forecast
    if typeOfFit == 'linear':
        model = Prophet(growth="linear",
                        daily_seasonality=False,
                        n_changepoints=n_changepoints)
    else:
        model = Prophet(growth="logistic",
                        daily_seasonality=False,
                        n_changepoints=n_changepoints)

    model.fit(dfProphetIn)
    future = model.make_future_dataframe(periods=3)
    if typeOfFit == 'logistic':
        future['cap'] = 15000
    forecast = model.predict(future)

    # Printing the change points of the model
    print(model.changepoints)

    # Create figure
    # plot the model using prophet
    fig = model.plot(forecast)
    _ = add_changepoints_to_plot(fig.gca(), model, forecast)
    plt.title(f"{name} log10({typeToVis}) over time and change points")
    plt.ylabel(f"log10(the number of cases)")
    plt.xlabel("")
    plt.show()

    if returnForcast:
        return forecast
Ejemplo n.º 18
0
from fbprophet import Prophet
import pandas as pd

df = pd.read_csv('./Datasets/Google_Stock_Price_Train.csv')
open_data = df.groupby('Date').sum()['Open'].reset_index()
open_data.columns = ['ds', 'y'] # renaming the columns as required by model

open_data['ds'] = pd.to_datetime(open_data['ds']) # formating date to required form to feed to the model



m = Prophet(interval_width = 0.96)
m.fit(open_data)

future_predict = m.make_future_dataframe(periods = 50)

my_forecast = m.predict(future_predict)

predicted_data = my_forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]


#############################################################################
# graphical visualization
import matplotlib
matplotlib.use('Agg')

fig = m.plot(predicted_data)

fig.savefig('./v2.eps', format = 'eps')

Ejemplo n.º 19
0
            )
        else:
            st.markdown(
                f"# PARABÉNS! Você está no caminho certo para fechar {today_year} com as contas em dia!"
            )

        st.write("\nA previsão solicitada se encontra a seguir:")

        st.header("Receitas")
        fcst['mes'] = fcst.ds.apply(lambda x: pd.to_datetime(x).month)
        fcst['ano'] = fcst.ds.apply(lambda x: pd.to_datetime(x).year)
        st.table(fcst[['mes', 'ano', 'yhat']].rename(columns={
            'mes': 'Mês',
            'ano': 'Ano',
            'yhat': 'Receita prevista'
        }).tail(n_periods))
        fig = m1.plot(fcst)
        st.pyplot(fig)

        st.header("Despesas")
        fcst2['mes'] = fcst2.ds.apply(lambda x: pd.to_datetime(x).month)
        fcst2['ano'] = fcst2.ds.apply(lambda x: pd.to_datetime(x).year)
        st.table(fcst2[['mes', 'ano',
                        'yhat']].rename(columns={
                            'mes': 'Mês',
                            'ano': 'Ano',
                            'yhat': 'Receita prevista'
                        }).tail(n_periods))
        fig2 = m2.plot(fcst2)
        st.pyplot(fig2)
  if column == 'prn' or column == 'epoch_time' or column=='index':
      continue
      
  if column == 'sv_clock_bias' or column == 'sv_clock_drift' or column == 'sv_clock_drift_rate' or column == 'mean_motion'  or  column == 'essentricity'  or column == 'sqrt_semi_major_axis'  or column == 'OMEGA' or column == 'inclination'  or column == 'omega' or column == 'OMEGA_dot' or column == 'inclination_rate' or column == 'codes' or column == 'gps_week' or column == 'l2_p_data_flag' or column == 'sv_accuracy' or column == 'sv_health' or column == 'tgd'  or column == 'fit_interval':
    df_m = pd.DataFrame()
    df_m['ds'] = df_prn['epoch_time']
    df_m['y'] = df_prn[column]

    model = Prophet()
    model.fit(df_m)

    future = model.make_future_dataframe(periods=days_to_predict, freq='H')
    forecast=model.predict(future)
    df_predicted[column] = forecast['yhat']
    path_img = 'img/'+column+'.png'
    model.plot(forecast).savefig('img/'+column+'.png')
    #model.plot(forecast)
    #print('RMSE: %f' % np.sqrt(np.mean((forecast.loc[:df_m['y'].size-1, 'yhat']-df_m['y'])**2)) )
    mse = mean_absolute_percentage_error(forecast.loc[:df_m['y'].size-1, 'yhat'],df_m['y'])
    #print("MSE: "+str(mse))
    avg_acc = avg_acc + mse
    del df_m

  if column == 'iode' or column == 'correction_radius_sine' or  column == 'correction_latitude_cosine' or column == 't_tx' or column == 'correction_latitude_sine' or  column == 'correction_inclination_cosine' or   column == 'correction_radius_cosine' or column == 'mean_anomaly' or  column == 'correction_inclination_sine':
    df_m = pd.DataFrame()
    df_m['ds'] = df_prn['epoch_time']
    df_m['y'] = df_prn[column]

    model = Prophet(changepoint_prior_scale=0.5)
    model.fit(df_m)
Ejemplo n.º 21
0
    def fit(self, ts_df: pd.DataFrame, target_col: str, cv: Optional[int], time_col: str) -> object:
        """
        Fits the model to the data

        :param ts_df The time series data to be used for fitting the model
        :type ts_df pd.DataFrame

        :param target_col The column name of the target time series that needs to be modeled.
        All other columns will be considered as exogenous variables (if applicable to method)
        :type target_col str

        :param cv: Number of folds to use for cross validation.
        Number of observations in the Validation set for each fold = forecast period
        If None, a single fold is used
        :type cv Optional[int]

        :param time_col: Name of the time column in the dataset (needed by Prophet)
        Time column can also be the index, in which case, this would be the name of the index
        :type time_col str

        :rtype object
        """
        # use all available threads/cores

        self.time_col = time_col
        self.original_target_col = target_col
        self.original_preds = [x for x in list(ts_df) if x not in [self.original_target_col]]

        if len(self.original_preds) == 0:
            self.univariate = True
        else:
            self.univariate = False

        # print(f"Prophet Is Univariate: {self.univariate}")

        ts_df = copy.deepcopy(ts_df)

        ##### if you are going to use matplotlib with prophet data, it gives an error unless you do this.
        pd.plotting.register_matplotlib_converters()

        #### You have to import Prophet if you are going to build a Prophet model #############
        actual = 'y'
        timecol = 'ds'

        data = self.prep_col_names_for_prophet(ts_df=ts_df, test=False)

        if self.univariate:
            dft = data[[timecol, actual]]
        else:
            dft = data[[timecol, actual] + self.original_preds]

        ##### For most Financial time series data, 80% conf interval is enough...
        if self.verbose >= 1:
            print('    Fit-Predict data (shape=%s) with Confidence Interval = %0.2f...' % (dft.shape, self.conf_int))
        ### Make Sure you lower your desired interval width from the normal 95% to a more realistic 80%
        start_time = time.time()

        if self.univariate is False:
            for name in self.original_preds:
                self.model.add_regressor(name)

        print("  Starting Prophet Fit")

        if self.seasonality:
            prophet_seasonality, prophet_period, fourier_order, prior_scale = get_prophet_seasonality(
                                        self.time_interval, self.seasonal_period)
            self.model.add_seasonality(name=prophet_seasonality,
                            period=prophet_period, fourier_order=fourier_order, prior_scale= prior_scale)
            print('       Adding %s seasonality to Prophet with period=%d, fourier_order=%d and prior_scale=%0.2f' %(
                                        prophet_seasonality, prophet_period, fourier_order, prior_scale))
        else:
            print('      No seasonality assumed since seasonality flag is set to False')


        if type(dft) == dask.dataframe.core.DataFrame:
            num_obs = dft.shape[0].compute()
        else:
            num_obs = dft.shape[0]

        ### Creating a new way to skip cross validation when trying to run auto-ts multiple times. ###
        if cv == 0:
            cv_in = 0
        else:
            cv_in = copy.deepcopy(cv)
        NFOLDS = self.get_num_folds_from_cv(cv)

        #########################################################################################
        # NOTE: This change to the FB recommendation will cause the cv folds from facebook to
        # be incompatible with the folds from the other models (in terms of periods of evaluation
        # as well as number of observations in each period). Hence the final comparison will
        # be biased since it will not compare the same folds.

        # The original implementation was giving issues under certain conditions, hence this change
        # to FB recommendation has been made as a temporary (short term) fix.
        # The root cause issue will need to be fixed eventually at a later point.
        #########################################################################################

        ### Prophet's Time Interval translates into frequency based on the following pandas date_range alias:
        #  Link: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timeseries-offset-aliases
        ## This is done using the get_prophet_time_interval() function later.
        if self.time_interval in self.list_of_valid_time_ints:
            time_int = copy.deepcopy(self.time_interval)
        else:
            time_int = self.get_prophet_time_interval(for_cv=False)

        # First  Fold -->
        #   Train Set: 0:initial
        #   Test Set: initial:(initial+horizon)
        # Second Fold -->
        #   Train Set: (period):(initial+period)
        #   Test Set: (initial+period):(initial+horizon+ period)
        # Format: '850 D'

        print("  Starting Prophet Cross Validation")
        ################################################################################
        if self.forecast_period <= 5:
            #### Set a minimum of 5 for the number of rows in test!
            self.forecast_period = 5
        ### In case the number of forecast_period is too high, just reduce it so it can fit into num_obs
        if NFOLDS*self.forecast_period > num_obs:
            self.forecast_period = int(num_obs/(NFOLDS+1))
            print('Lowering forecast period to %d to enable cross_validation' %self.forecast_period)
        ###########################################################################################
        #cv = GapWalkForward(n_splits=NFOLDS, gap_size=0, test_size=self.forecast_period)
        max_trainsize = len(dft) - self.forecast_period
        try:
            cv = TimeSeriesSplit(n_splits=NFOLDS, test_size=self.forecast_period) ### this works only sklearn v 0.0.24]
        except:
            cv = TimeSeriesSplit(n_splits=NFOLDS, max_train_size = max_trainsize)
        y_preds = pd.DataFrame()
        print('Max. iterations using expanding window cross validation = %d' %NFOLDS)
        start_time = time.time()
        rmse_folds = []
        norm_rmse_folds = []
        forecast_df_folds = []

        concatenated = pd.DataFrame()
        extra_concatenated = pd.DataFrame()

        if type(dft) == dask.dataframe.core.DataFrame:
            dft = dft.head(len(dft)) ### this converts dask into a pandas dataframe

        if  cv_in == 0:
            print('Skipping cross validation steps since cross_validation = %s' %cv_in)
            model = Prophet(growth="linear")
            kwargs = {'iter':1e2} ## this limits iterations and hence speeds up prophet
        else:
            for fold_number, (train_index, test_index) in enumerate(cv.split(dft)):
                dftx = dft.head(len(train_index)+len(test_index))
                train_fold = dftx.head(len(train_index)) ## now train will be the first segment of dftx
                test_fold = dftx.tail(len(test_index)) ### now test will be right after train in dftx

                horizon = len(test_fold)
                print(f"\nFold Number: {fold_number+1} --> Train Shape: {train_fold.shape[0]} Test Shape: {test_fold.shape[0]}")

                #########################################
                #### Define the model with fold data ####
                #########################################

                model = Prophet(growth="linear")

                ############################################
                #### Fit the model with train_fold data ####
                ############################################

                kwargs = {'iter':1e2} ## this limits iterations and hence speeds up prophet
                model.fit(train_fold, **kwargs)

                #################################################
                #### Predict using model with test_fold data ####
                #################################################

                future_period = model.make_future_dataframe(freq=time_int, periods=horizon)
                forecast_df = model.predict(future_period)
                ### Now compare the actuals with predictions ######

                y_pred = forecast_df['yhat'][-horizon:]

                concatenated = pd.DataFrame(np.c_[test_fold[actual].values,
                            y_pred.values], columns=['original', 'predicted'],index=test_fold.index)

                if fold_number == 0:
                    extra_concatenated = copy.deepcopy(concatenated)
                else:
                    extra_concatenated = extra_concatenated.append(concatenated)

                rmse_fold, rmse_norm = print_dynamic_rmse(concatenated['original'].values, concatenated['predicted'].values,
                                            concatenated['original'].values)

                print('Cross Validation window: %d completed' %(fold_number+1,))
                rmse_folds.append(rmse_fold)
                norm_rmse_folds.append(rmse_norm)

            ######################################################
            ### This is where you consolidate the CV results #####
            ######################################################
            fig = model.plot(forecast_df)
            #rmse_mean = np.mean(rmse_folds)
            #print('Average CV RMSE over %d windows (macro) = %0.5f' %(fold_number+1,rmse_mean))


            #cv_micro = np.sqrt(mean_squared_error(y_trues.values, y_preds.values))
            #print('Average CV RMSE of all predictions (micro) = %0.5f' %cv_micro)

            try:
                print_ts_model_stats(extra_concatenated['original'], extra_concatenated['predicted'], "Prophet")
            except:
                print('Error: Not able to plot Prophet CV results')

            forecast_df_folds = extra_concatenated['predicted'].values
            #print("  End of Prophet Cross Validation")
            print('Time Taken = %0.0f seconds' %((time.time()-start_time)))

        #### Now you need to fit Prophet on the whole train data set ##########
        dftx = dft.head(len(dft))
        model = Prophet(growth="linear")
        self.model = model
        self.model.fit(dftx, **kwargs)
        print("  End of Prophet Fit")

        #num_obs_folds = df_cv.groupby('cutoff')['ds'].count()

        # https://stackoverflow.com/questions/54405704/check-if-all-values-in-dataframe-column-are-the-same
        #a = num_obs_folds.to_numpy()
        #all_equal = (a[0] == a).all()

        #if not all_equal:
            #print("WARNING: All folds did not have the same number of observations in the validation sets.")
            #print("Num Test Obs Per fold")
            #print(num_obs_folds)

        #rmse_folds = []
        #norm_rmse_folds = []
        #forecast_df_folds = []

        #df_cv_grouped = df_cv.groupby('cutoff')
        #for (_, loop_df) in df_cv_grouped:
        #    rmse, norm_rmse = print_dynamic_rmse(loop_df['y'], loop_df['yhat'], dft['y'])
        #    rmse_folds.append(rmse)
        #    norm_rmse_folds.append(norm_rmse)
        #    forecast_df_folds.append(loop_df)

        # print(f"RMSE Folds: {rmse_folds}")
        # print(f"Norm RMSE Folds: {norm_rmse_folds}")
        # print(f"Forecast DF folds: {forecast_df_folds}")

        # forecast = self.predict(simple=False, return_train_preds=True)

        # ####  We are going to plot Prophet's forecasts differently since it is better
        # dfa = plot_prophet(dft, forecast);
        # # Prophet makes Incredible Predictions Charts!
        # ###  There can't be anything simpler than this to make Forecasts!
        # #self.model.plot(forecast);  # make sure to add semi-colon in the end to avoid plotting twice
        # # Also their Trend, Seasonality Charts are Spot On!
        # try:
        #     self.model.plot_components(forecast)
        # except:
        #     print('Error in FB Prophet components forecast. Continuing...')

        #rmse, norm_rmse = print_dynamic_rmse(dfa['y'], dfa['yhat'], dfa['y'])

        #return self.model, forecast, rmse, norm_rmse
        return self.model, forecast_df_folds, rmse_folds, norm_rmse_folds
avocado_df = avocado_df.sort_values("Date")
plt.plot(avocado_df["Date"], avocado_df["AveragePrice"])

sns.countplot(y="region", data=avocado_df)
sns.countplot(y="year", data=avocado_df)

avocado_prophet = avocado_df[["Date", "AveragePrice"]]
avocado_prophet.columns = ["ds", "y"]

m = Prophet()
m.fit(avocado_prophet)
future = m.make_future_dataframe(periods=365)
forecast = m.predict(future)

m.plot(forecast,
       xlabel="Date",
       ylabel="Average Price of Avocado",
       figsize=(10, 10))
m.plot_components(forecast)

avocado_df["region"]

### REGION SPECIFIC ###

avocado_df = avocado_df[avocado_df["region"] == "California"]
avocado_df = avocado_df.sort_values("Date")
plt.plot(avocado_df["Date"], avocado_df["AveragePrice"])

sns.countplot(y="year", data=avocado_df)

avocado_prophet = avocado_df[["Date", "AveragePrice"]]
avocado_prophet.columns = ["ds", "y"]
# Prophet Model

data01_model = Prophet(interval_width=0.95)

data01 = data01.rename(columns={'M_Y': 'ds', 'Qty': 'y'})

data01_model.fit(data01)

data_forecast = data01_model.make_future_dataframe(periods=12, freq='W')
data_forecast_pred = data01_model.predict(data_forecast)

import matplotlib.pyplot as plt

plt.figure(figsize=(18, 6))
data01_model.plot(data_forecast_pred, xlabel='Date', ylabel='Qty')
plt.title('Qty Forecast')

#fig = px.line(data01, x='ds', y='y')
#fig.show()
#
#data.info()

data01['ds'] = pd.to_datetime(data01['ds'], format='%Y-%m-%d').dt.date

data01.info()

fdata = data01[['ds', 'y']]

fdata.set_index('ds', inplace=True)
import pandas as pd
import numpy as np
from fbprophet import Prophet

df = pd.read_csv('./examples/example_wp_R.csv')
df['y'] = np.log(df['y'])
df['cap'] = 8.5

m = Prophet(growth='logistic')
m.fit(df)

future = m.make_future_dataframe(periods=2)
future['cap'] = 8.5

forecast = m.predict(future)
print(forecast)

m.plot(forecast)
from matplotlib import pyplot as plt
plt.show()
Ejemplo n.º 25
0
def predict_each_satellite(prn):
    print(prn)
    df_temp = pd.DataFrame()
    avg_acc = 0.0

    df_prn = df_final.loc[df_final['prn'] == prn].reset_index()
    df_expect2 = df_expect.loc[df_expect['prn'] == prn].reset_index()
    future = pd.DataFrame()
    future['ds'] = df_expect2['epoch_time']
    #print(future)

    df_m = pd.DataFrame()
    df_m['ds'] = df_prn['epoch_time']
    df_m['y'] = df_prn['epoch_time']
    model = Prophet()
    model.fit(df_m)
    #future = model.make_future_dataframe(periods=30, freq='H')
    df_temp['prn'] = prn
    df_temp['epoch_time'] = df_expect2['epoch_time']
    df_temp['prn'] = prn

    for column in list(df_prn.columns.values):
        #print('--------column--------- :'+ str(column))
        if column == 'prn' or column == 'epoch_time' or column == 'index':
            continue

        if column == 'sv_clock_bias' or column == 'sv_clock_drift' or column == 'sv_clock_drift_rate' or column == 'mean_motion' or column == 'essentricity' or column == 'sqrt_semi_major_axis' or column == 'OMEGA' or column == 'inclination' or column == 'omega' or column == 'OMEGA_dot' or column == 'inclination_rate' or column == 'codes' or column == 'gps_week' or column == 'l2_p_data_flag' or column == 'sv_accuracy' or column == 'sv_health' or column == 'tgd' or column == 'fit_interval':
            df_m = pd.DataFrame()
            df_m['ds'] = df_prn['epoch_time']
            df_m['y'] = df_prn[column]

            model = Prophet()
            model.fit(df_m)

            forecast = model.predict(future)
            model.plot(forecast)
            #print('RMSE: %f' % (np.sqrt(np.mean((forecast.loc[:df2['y'].size-1, 'yhat']-df2['y'])**2))/np.abs(np.max(forecast.loc[:df2['y'].size-1, 'yhat']-df2['y']))))

            mse = mean_absolute_percentage_error(forecast['yhat'],
                                                 df_expect2[column])
            #print("MSE: "+str(mse))
            avg_acc = avg_acc + mse
            df_temp[column] = forecast['yhat']
            del df_m

        if column == 'iode' or column == 'correction_radius_sine' or column == 'correction_latitude_cosine' or column == 't_tx' or column == 'correction_latitude_sine' or column == 'correction_inclination_cosine' or column == 'correction_radius_cosine' or column == 'mean_anomaly' or column == 'correction_inclination_sine':
            df_m = pd.DataFrame()
            df_m['ds'] = df_prn['epoch_time']
            df_m['y'] = df_prn[column]

            model = Prophet(changepoint_prior_scale=0.5)
            model.fit(df_m)

            forecast = model.predict(future)
            model.plot(forecast)
            #print('RMSE: %f' % (np.sqrt(np.mean((forecast.loc[:df2['y'].size-1, 'yhat']-df2['y'])**2))/np.abs(np.max(forecast.loc[:df2['y'].size-1, 'yhat']-df2['y']))))

            mse = mean_absolute_percentage_error(forecast['yhat'],
                                                 df_expect2[column])
            #print("MSE: "+str(mse))
            avg_acc = avg_acc + mse
            df_temp[column] = forecast['yhat']
            del df_m

        if column == 'time_of_ephemeris' or column == 'iodc':
            df_m = pd.DataFrame()
            df_m['ds'] = df_prn['epoch_time']
            df_m['y'] = df_prn[column]

            model = Prophet(changepoint_prior_scale=1.0)
            model.fit(df_m)

            forecast = model.predict(future)
            model.plot(forecast)

            #model.plot(forecast)
            #print('RMSE: %f' % (np.sqrt(np.mean((forecast.loc[:df2['y'].size-1, 'yhat']-df2['y'])**2))/np.abs(np.max(forecast.loc[:df2['y'].size-1, 'yhat']-df2['y']))))

            mse = mean_absolute_percentage_error(forecast['yhat'],
                                                 df_expect2[column])
            #print("MSE: "+str(mse))
            avg_acc = avg_acc + mse
            df_temp[column] = forecast['yhat']
            del df_m

    #df_predicted = df_predicted.append(df_temp, ignore_index=True)
    print(df_temp.shape)

    print(" PRN : " + str(prn) + "  acc : " + str(avg_acc / 30))

    return (df_temp, avg_acc, prn)
Ejemplo n.º 26
0
fig = fig = px.line(d_test, x='ds', y='y')#memvisualisasi data yang digunakan sebagai data test
fig.show()

fig = fig = px.line(d_train, x='ds', y='y')#memvisualisasi data yang digunakan sebagai data train
fig.show()

"""#membuat model prophet tanpa melalukan tune pada seasonality parameter"""

m = Prophet( interval_width=0.95,) # the Prophet class (model)

m.fit(d_train) # fit the model using all data

future = m.make_future_dataframe(periods=(1336),freq='H',) #we need to specify the number of days in future
prediction = m.predict(future)

m.plot(prediction)# seperti yang dapat dilihat confident dari model berusaha melakukan fit tetapi model belum dapat mengikuti seasonality dr data set

import statsmodels.api as sm
sm.graphics.tsa.plot_acf(d_train['y'].values.squeeze(),lags=162)
plt.ylim(-1,1)
plt.xlim(72,75)
plt.show()

"""# membuat model dengan melakukan  tune pada parameter modelnya
parameter yang akan sangat berdampak adalah parameter seasonality disini seperti berdasarkan yang kita lihat dari preproses untuk melihat tren dan seasonality dari dataset dataset memiliki seasonality selama 2 hari sekali, 1 bulan sekali dan 2 bulan sekali hal itu juga di tunjukan berdasarkan autocorelasion dari datanya
"""

m = Prophet( interval_width=0.95, weekly_seasonality=False,daily_seasonality=False,changepoint_prior_scale=0.07) # the Prophet class (model)
m.add_seasonality(name='daily',period=2,fourier_order=4,prior_scale=20,mode='additive')
m.add_seasonality(name='monthly',period=29.5,fourier_order=100,prior_scale=15,mode='additive')
m.add_seasonality(name='weekly',period=7,fourier_order=20,prior_scale=6,mode='additive')
Ejemplo n.º 27
0
df.columns.values[0]


#log-transform the y variable
#to linearize a relationship
#simplify the number and complexity of interaction terms
df['y'] = np.log(df['y'])
df.head()

type(df['ds'][0])

#------------------------------#
m = Prophet()
m.fit(df)


#Select the days to make a new dataframe
future = model.make_future_dataframe(periods=50)

#This is the prediction, literally speaking 
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()

#Plot the results 
%matplotlib inline
c = m.plot(forecast);

#Plot the components
m.plot_components(forecast);
#------------------------------#
Ejemplo n.º 28
0
    if (Year_End_Train == Year) & (Month == Month_End_Train + 1):
        split_index = index
        break
data_train = data.iloc[:split_index]
for index in range(data_train.shape[0]):
    data_train.at[index, 'y'] = np.log(data_train.loc[index, 'y'])
data_test = data.iloc[split_index:]
predict_length = (data_test.shape[0])

#%% Model training and prediction
model = Prophet(daily_seasonality=True)
model.fit(data_train)
predictor = model.make_future_dataframe(periods=predict_length)
data_prediction = model.predict(predictor)

#%% Plot result
plt.figure()
model.plot(data_prediction)

data_test.set_index('ds', inplace=True)
for index in range(data_prediction.shape[0]):
    data_prediction.at[index, 'yhat'] = np.exp(data_prediction.loc[index,
                                                                   'yhat'])
data_prediction.index = list(data['ds'])
data_prediction = data_prediction.drop(['ds'], axis=1).tail(predict_length)

fig, ax1 = plt.subplots(figsize=(10, 8))
ax1.plot(data_test['y'], label='True Price')
ax1.plot(data_prediction['yhat'], 'r-', label='Predicted Price')
ax1.legend()
Ejemplo n.º 29
0
import pandas as pd
from fbprophet import Prophet
import matplotlib.pyplot as plt

df = pd.read_csv('../data/GOOGL.csv')
df.columns = ['ds', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'y']
model = Prophet()
model.fit(df)
future = model.make_future_dataframe(periods=100)
forecast = model.predict(future)
fig1 = model.plot(forecast)
fig2 = model.plot_components(forecast)
plt.show()
Ejemplo n.º 30
0
                        how ='left',left_index=True, right_index=True)


# In[78]:


#set ds as index
prophet_pred = prophet_pred.reset_index('ds')


# In[79]:


#plot prophet yhat, yhat_lower, and yhat_upper

pro.plot(prophet_pred)
plt.savefig(r'/home/songy4/Documents/prophet_log_range.png')


# In[80]:


pro.plot_components(prophet_pred)
plt.savefig(r'/home/songy4/Documents/prophet_components.png')


# In[81]:


#calculate rmse, mse errors and mean value
office = office.rename(columns={'Order Date': 'ds', 'Sales': 'y'})
office_model = Prophet(interval_width=0.95)
office_model.fit(office)

# In[73]:

furniture_forecast = furniture_model.make_future_dataframe(periods=36,
                                                           freq='MS')
furniture_forecast = furniture_model.predict(furniture_forecast)

# In[74]:

office_forecast = office_model.make_future_dataframe(periods=36, freq='MS')
office_forecast = office_model.predict(office_forecast)
plt.figure(figsize=(18, 6))
furniture_model.plot(furniture_forecast, xlabel='Date', ylabel='Sales')
plt.title('Furniture Sales')

# In[75]:

plt.figure(figsize=(18, 6))
office_model.plot(office_forecast, xlabel='Date', ylabel='Sales')
plt.title('Office Supplies Sales')

# # Compare Forecasts
# We already have the forecasts for three years for these two categories into the future. We will now join them together to compare their future forecasts.

# In[76]:

furniture_names = [
    'furniture_%s' % column for column in furniture_forecast.columns
print(forecast['yhat_upper'][0:5])

#%%
## This works and retained the dataframe type
forecast2 = np.exp(forecast1[['yhat','yhat_lower','yhat_upper']])

## Now merge to bring the ds back into the df
## Without the "on" keyword the join key is implicitly the index
forecast2 = forecast2.join(forecast1['ds'], how='inner')

print(forecast1)

#%%
## This works
## This will create a plot that includes Forecasted, C.I.'s, and Actual values
m.plot(forecast2)

#%%
## I think it is unecessary to review exponentiated components 
## Plus the complexity of joining forecast2 with forecast1
m.plot_components(forecast1);

#%%
## It was necessary, in the fill_between, to use a datetime index associated with 
## the first parameter of the function.
## This necessitated converting the existing ds datetime element to an index
pplt.subplots(figsize=(30,10))
forecast2.set_index('ds',inplace=True)

pplt.plot(view_hour['distinct_freq_sum'], label='Original', color='black');
pplt.plot(forecast2.yhat, color='red', label='Forecast');
Ejemplo n.º 33
0
def main():
    st.header("Online Stock Price Ticker")

    # symbol = st.text_input('심볼 입력 : ')
    symbol = 'AMZN'

    data = yf.Ticker(symbol)

    today = datetime.now().date().isoformat()  #문자열로
    print(today)

    #------------------------------------------------------
    df = data.history(start='2010-06-01', end=today)

    st.dataframe(df)
    # --------------------Close차트 -----------------
    st.subheader('종가')
    st.line_chart(df['Close'])

    #------------------------------------------------
    st.subheader('거래량')
    st.line_chart(df['Volume'])

    # yfinace의 라이브러리만의 정보
    # data.info
    # data.calendar
    # data.major_holders
    # data.institutional_holders
    # data.recommendations
    div_df = data.dividends  #배당금정보
    st.dataframe(div_df.resample('Y').sum())

    new_df = div_df.reset_index()
    new_df['Year'] = new_df['Date'].dt.year

    st.dataframe(new_df)

    fig = plt.figure()
    plt.bar(new_df['Year'], new_df['Dividends'])
    st.pyplot(fig)

    # 여러주식 한번에 보여주기

    favorites = ['msft', 'aapl', 'amzn', 'tsla', 'nvda']

    f_df = pd.DataFrame()
    for stock in favorites:
        f_df[stock] = yf.Ticker(stock).history(start='2010-01-01',
                                               end=today)['Close']

    st.dataframe(f_df)
    # 차트그리기
    st.line_chart(f_df)
    #=============================stocktwits===================================
    # 스탁 트윗 API 호출
    res = requests.get(
        'https://api.stocktwits.com/api/2/streams/symbol/{}.json'.format(
            symbol))
    # json 형식이므로 .json()이용
    res_data = res.json()
    # 파이썬의 딕셔너리와 리스트로 활용
    # st.write(res_data)

    for massage in res_data['messages']:

        col1, col2 = st.beta_columns([1, 4])  #영역 잡기(비율)

        with col1:
            st.image(massage['user']['avatar_url'])  #아바타 사진
        with col2:
            st.write('유저이름 : ' + massage['user']['username'])
            st.write('트윗 내용 : ' + massage['body'])
            st.write('올린 시간 : ' + massage['created_at'])
    p_df = df.reset_index()

    p_df.rename(columns={'Date': 'ds', 'Close': 'y'}, inplace=True)

    st.dataframe(p_df)
    #예측 가능
    m = Prophet()
    m.fit(p_df)

    future = m.make_future_dataframe(periods=365)
    forecast = m.predict(future)

    st.dataframe(forecast)

    fig1 = m.plot(forecast)
    st.pyplot(fig1)

    fig2 = m.plot_components(forecast)
    st.pyplot(fig2)

    pass
#print(forecast1[['ds','yhat','yhat_lower','yhat_upper']])

#%%
## This works and retained the dataframe type
forecast2 = np.exp(forecast1[['yhat','yhat_lower','yhat_upper']])

## Now merge to bring the ds back into the df
## Without the "on" keyword the join key is implicitly the index which is what we're doing here
forecast2 = forecast2.join(forecast1['ds'], how='inner')



#%%
## This works
## This will create a plot that includes Forecasted, C.I.'s, and Actual values
m.plot(forecast1)

#%%
## I think it is unecessary to review exponentiated components 
## Plus the complexity of joining forecast2 with forecast1
m.plot_components(forecast1);

#%%
## It was necessary, in the fill_between, to use a datetime index associated with 
## the first parameter of the function.
## This necessitated converting the existing ds datetime element to an index
pplt.subplots(figsize=(30,10))
forecast2.set_index('ds',inplace=True)

## If using the view_hour data it will be REQUIRED to exponentiate the forecasts (i.e., forecast2)
pplt.plot(view_hour['distinct_freq_sum'], label='Original', color='black');
Ejemplo n.º 35
0
daily_train["ds"] = daily_train.index
daily_train["y"] = daily_train.Count  #设置为ds、y的保留字
print(daily_train.head())
daily_train.drop(["Count"], axis=1, inplace=True)
print(daily_train.head())

#拟合Prophet模型并做训练
model = Prophet(yearly_seasonality=True,
                daily_seasonality=True,
                seasonality_prior_scale=0.1)
model.fit(daily_train)
#预测未来7个月,213天
future = model.make_future_dataframe(periods=213)
forecast = model.predict(future)
print(forecast)
model.plot(forecast)
#查看各个成分
model.plot_components(forecast)

# 对节假日建模
# 将节日看成是一个正态分布,把活动期间当做波峰,lower_window 以及upper_window 的窗口作为扩散
chinese_seasons = pd.DataFrame({
    'holiday':
    'chinese_season',
    'ds':
    pd.to_datetime([
        '2012-01-01', '2012-05-01', '2012-10-01', '2013-01-01', '2013-05-01',
        '2013-10-01', '2014-01-01', '2014-05-01', '2014-10-01', '2015-01-01',
        '2015-05-01', '2015-10-01'
    ]),
    'lower_window':