plt.plot(y['y'])
locs, labs = plt.xticks()
plt.xticks(xt, labels)
plt.xlabel('Year')
plt.ylabel('Number of Passengers')
plt.axvline(x=131, c='k', linestyle='dashed', linewidth=0.5)
print(
    "Monthly totals of international airline passengers between 1949 to 1960 in thousands from which we would like to predict the year 1960. We will use Facebook Prophet. In this example, we will concentrate on may 1960."
)
input("Press Enter to continue...")
plt.show()

# Generate forecasts for 1960 by using Facebook Prophet.
y_train = y.iloc[0:132, :]
y_test = y.iloc[132:, :]
m = Prophet(mcmc_samples=1000, seasonality_mode='multiplicative')
m.fit(y_train)
future = m.make_future_dataframe(periods=y_test.shape[0], freq='MS')
forecast = m.predict(future)

# Calculate the RMSE for the Facebook Prophet forecast for may 1960
RMSE_May_FBP = np.sqrt(
    np.square(forecast['yhat'][may_1960_index] - y['y'][may_1960_index]))
print("The Facebbook Prophet RMSE for may 1960 (blue circle) is:",
      RMSE_May_FBP)

# Extracting the point forecasts of Facebook Prophet
forecast_new = forecast['yhat'].copy()

# Plot the forecasts
y_p = y['y'][120:]
Ejemplo n.º 2
0
                             name='Stock Open'))
    fig.add_trace(
        go.Scatter(x=data['Date'], y=data['Close'], name='Stock Close'))
    fig.layout.update(title_text='Cyrpto Open/Close',
                      xaxis_rangeslider_visible=True)
    st.plotly_chart(fig)


plot_raw_data()

#CYRPTO PREDICTIONS

df_train = data[['Date', 'Close']]
df_train = df_train.rename(columns={"Date": "ds", "Close": "y"})

m = Prophet(changepoint_range=random.randint(0, 1))
#m = Prophet(changepoint_prior_scale = 0.5)
m.fit(df_train)
future = m.make_future_dataframe(periods=period)
forecast = m.predict(future)

st.subheader("CYRPTO PREDICTIONS")
st.subheader("(Future prices calculated by complex super math jargon-o-tron")
st.write(forecast.tail())

#PREDICTIONS PLOT

st.write("Prediction Data")
fig1 = plot_plotly(m, forecast)
st.plotly_chart(fig1)
Ejemplo n.º 3
0
def show_trend(df, country, place, metric, n_changepoints=20, events=None):
    """
    Show trend of log10(@variable) using fbprophet package.
    @ncov_df <pd.DataFrame>: the clean data
    @variable <str>: variable name to analyse
        - if Confirmed, use Infected + Recovered + Deaths
    @n_changepoints <int>: max number of change points
    @kwargs: keword arguments of select_area()
    """
    # Log10(x)
    warnings.resetwarnings()
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        df["y"] = df["y"]  #np.log10(df["y"]).replace([np.inf, -np.inf], 0)
    # fbprophet
    model = Prophet(growth="linear",
                    daily_seasonality=False,
                    n_changepoints=n_changepoints)
    model.fit(df)
    future = model.make_future_dataframe(periods=14)
    forecast = model.predict(future)

    c = model.changepoints
    # Create figure
    fig = model.plot(forecast, figsize=(15, 5))

    ax = plt.gca()
    for i, v in model.changepoints.items():
        s = f'{v.month}/{v.day}/{v.year % 100}'
        text(v, (df["y"].max() - df["y"].min()) / 2.,
             '',
             rotation=90,
             fontsize=8,
             color='gray')

    middle = (df["y"].max() - df["y"].min()) / 2. - (df["y"].max() -
                                                     df["y"].min()) / 4.
    if events:
        #plot events
        for evt in events:
            ax.axvline(x=evt['date'], linewidth=1, color='lightgrey')
            text(evt['date'],
                 middle,
                 f'{evt["date"]}: {evt["event"]}',
                 rotation=90,
                 fontsize=8,
                 color='gray')
    _ = add_changepoints_to_plot(fig.gca(), model, forecast, cp_color='tomato')
    name = f"{place}: "
    plt.title(
        f"{name} Cumulative number of {metric} over time and chainge points")
    plt.ylabel(f"Cumulative number of {metric}")
    plt.xlabel("")
    ax.xaxis.set_major_formatter(DateFormatter('%b,%Y'))
    ax.grid(False)

    # Use tight layout
    fig.tight_layout()

    plt.savefig(f'../img/{country}/{place}_{metric}_trend.pdf', dpi=600)
    plt.clf()
Ejemplo n.º 4
0
)
db = client.forex
collection = db.EURUSD
data = collection.find()
date = []
rate = []
df = []
A = []
B = []
for X in data:
    A.append(X['EURUSD']['rate'])
    B.append(str(X['EURUSD']['date']))
Data = {'ds': B, 'y': A}
df = pd.DataFrame(Data)

model = Prophet()
model.fit(df)
future = model.make_future_dataframe(periods=60, freq='min')
print(df.tail())
print(future.tail())
forecast = model.predict(future)
print(forecast[['ds', 'yhat_lower', 'yhat_upper', 'yhat']])

model.plot(forecast)
plt.savefig('Forecast 1.png')
model.plot_components(forecast)
plt.savefig('Forecast 3.png')

model2 = Prophet()
model2.fit(df[-150:])
model2.plot(forecast[-180:])
def UpdateForecasts(credsfile = 'data/credentials.txt'):

    """
    Updates all time series forecast of the number of respondents in each DMA (usining facebook's prophet) and writes 
    all forecasts to CSV.  These forecasts are used in this generative model.  Note this takes about an, so should only 
    be updated occasionally.  There are no inputs and the fuction does not return anthing.  It merely computes the 
    forecasts and writes the results to 'data/all_intab_forecasts.csv'

    """

    ###read in credentials
    f = open(credsfile, "r")
    creds = f.read().splitlines()
    f.close()



    ###connect to redshift
    cur, con = redshift_connect(creds[0], '\\' + creds[1], creds[2], creds[3], creds[4])

    ### fetch data of number of people intab for all DMAs across time

    command = \
      "select date, RTRIM(dma_name) as dma_name, count(distinct(pid)) as total_intab \
        from( \
             (select A.date, A.pid, B.household_number \
                from dev.nielsen_in_tab A \
              INNER JOIN dev.nielsen_market_breaks B \
              ON A.pid = B.pid) AS C \
      INNER JOIN dev.l5m_dmas \
      ON dev.l5m_dmas.hhid =c.household_number) \
      group by date, dma_name \
      order by date, dma_name;"

    df = pd_df(cur, command)

    ### close up connections
    cur.close()
    con.close()

    ## strip white space
    df['dma_name'] = df['dma_name'].apply(lambda x: x.lstrip().rstrip())

    ### cut off data before 2016
    df['year'] = df['date'].apply(lambda x: x.year)
    df = df[df.year>2015]
    del df['year']

    ### use prophet to forcast all DMAs, save the figure for each forecast and save a csv of all forecasts

    forecasts = []

    for name in set(df.dma_name):

        df_to_fit = df[df.dma_name == name].reset_index(drop=True)
        del df_to_fit['dma_name']
        df_to_fit.columns = ['ds', 'y']
        m = Prophet(interval_width=0.68)
        m.fit(df_to_fit)

        future = m.make_future_dataframe(periods=365)

        forecast = m.predict(future)
        stuff = forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].copy()
        stuff['DMA'] = name
        forecasts.append(stuff)
        
        
    df_empty = pd.DataFrame({'ds':[], 'yhat':[], 'yhat_lower':[], 'yhat_upper':[], 'DMA':[]})
    final_df = df_empty.append(forecasts).reset_index(drop=True)

    ## write all forecasted time series to file
    final_df.to_csv('data/all_intab_forecasts.csv')

    return 
Ejemplo n.º 6
0
def makePredictionFrame(cubes_frames, last_timestamp):
    prediction_frames = []
    count_forecasted=1
    count_forcsasts_todo = len(cubes_frames)
    for cube in cubes_frames:

        # extracting information from the current cube
        last_timestamp = cube['timestamp'].iloc[0]
        server_name = cube['server'].iloc[0]
        cluster = cube['cluster'].iloc[0]
        dc = cube['dc'].iloc[0]
        perm = cube['perm'].iloc[0]
        instanz = cube['instanz'].iloc[0]
        verfahren = cube['verfahren'].iloc[0]
        service = cube['service'].iloc[0]

        # get only timestamp and count from the data
        dataset = cube.drop(cube.columns.difference(['timestamp', 'count']), 1)

        # removing timezone from timestamp
        dataset['timestamp'] = pd.to_datetime(dataset['timestamp'])
        dataset['timestamp'] = dataset['timestamp'].dt.tz_localize(None)

        # renaming columns for prophet input requirements
        dataset.rename(columns={"timestamp": "ds", "count": "y"}, inplace=True)

        # predict
        m = Prophet()
        m.fit(dataset)
        future = m.make_future_dataframe(periods=forecast_steps, freq='15min')
        forecast = m.predict(future)
        forecast = forecast['yhat'].tail(672)
        forecast = forecast.astype(int, copy=True)
        forecast *= (forecast > 0)

        # make the dataframe
        next_timestamps = pd.date_range(
            start=last_timestamp, periods=forecast_steps+1, freq='15min',  closed='right')
        
        # create the prediction dataframe for the current server
        d = {'timestamp': next_timestamps, 'cluster': cluster, 'dc': dc,
             'perm': perm, 'instanz': instanz,  'verfahren': verfahren, 'service': service, 'response': 200}
        pred_df = pd.DataFrame(data=d)
        pred_df['count'] = forecast.values
        pred_df['minv'] = 0
        pred_df['maxv'] = 0
        pred_df['avg'] = 0
        pred_df['var'] = 0
        pred_df['dev_upp'] = 0
        pred_df['dev_low'] = 0
        pred_df['perc90'] = 0
        pred_df['perc95'] = 0
        pred_df['perc99.9'] = 0
        pred_df['sum'] = 0
        pred_df['sum_of_squares'] = 0
        pred_df['server'] = server_name

        pred_df['timestamp'] = pred_df['timestamp'].dt.strftime(
            '%Y-%m-%dT%H:%M:00Z')

        prediction_frames.append(pred_df)
        print("predicted: ", count_forecasted, " of ", count_forcsasts_todo,flush=True)
        count_forecasted+=1
    print("Made predictions",flush=True)
    return pd.concat(prediction_frames, ignore_index=True)
Ejemplo n.º 7
0
import pandas as pd
from fbprophet import Prophet

import matplotlib.pyplot as plt
#plt.style.use('')

df = pd.read_csv('monthly.csv')

df['Month'] = pd.DatetimeIndex(df['Month'])
df = df.rename(columns={'Month': 'ds', 'Monthly': 'y'})
print(df.head(3))
print(df.dtypes)

ax = df.set_index('ds').plot(figsize=(12, 8))
ax.set_ylabel(
    'Monthly mean thickness (Dobson units) ozone column Arosa, Switzerland 1926-1971'
)
ax.set_xlabel('Date')

my_model = Prophet(interval_width=0.95)

my_model.fit(df)

future_dates = my_model.make_future_dataframe(periods=36, freq='MS')

forecast = my_model.predict(future_dates)
print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']])
my_model.plot(forecast, uncertainty=True)
my_model.plot_components(forecast)
plt.show()
Ejemplo n.º 8
0
def chartTest():
    return render_template('plot.html',
                           name=plt.show(),
                           name2=model.plot_components(forecast))


prophetframe1 = df.filter(['Date', 'Occupancy Adults'])
prophetframe1['Date'] = pd.to_datetime(df['Date'])
prophetframe1 = prophetframe1.set_index('Date')

prophet_frame1 = prophetframe1.reset_index().dropna()
prophet_frame1.columns = ['ds', 'y']

model = Prophet(yearly_seasonality=True,
                weekly_seasonality=True,
                holidays=holidays)
model.fit(prophet_frame1)
future1 = model.make_future_dataframe(periods=36, freq='MS')
forecast = model.predict(future1)


@app.route("/adult_prediction", methods=["GET"])
def predict_adult():
    fig = create_adults()
    output = io.BytesIO()
    FigureCanvas(fig).print_png(output)
    return Response(output.getvalue(), mimetype='image/png')


def create_adults():
filtered_data = pd.read_csv(
    'prophet-master/examples/example_wp_log_peyton_manning.csv')

print(len(filtered_data))

train_length = int(len(filtered_data) * 1)
print(train_length)
train_dataset = filtered_data[0:train_length]
print(train_dataset)

plt.plot(train_dataset['y'])
plt.show()

# Python
m = Prophet(yearly_seasonality=True,
            weekly_seasonality=True,
            changepoint_prior_scale=0.2,
            interval_width=0.95)
m.fit(train_dataset)

future = m.make_future_dataframe(periods=365, freq='D')
print(future.columns)

forecast = m.predict(future)
print(forecast.columns)
print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail())
fig1 = m.plot(forecast)
plt.show()
fig2 = m.plot_components(forecast)
plt.show()
Ejemplo n.º 10
0
 def test_added_regressors(self):
     m = Prophet()
     m.add_regressor('binary_feature', prior_scale=0.2)
     m.add_regressor('numeric_feature', prior_scale=0.5)
     m.add_regressor('numeric_feature2',
                     prior_scale=0.5,
                     mode='multiplicative')
     m.add_regressor('binary_feature2', standardize=True)
     df = DATA.copy()
     df['binary_feature'] = [0] * 255 + [1] * 255
     df['numeric_feature'] = range(510)
     df['numeric_feature2'] = range(510)
     with self.assertRaises(ValueError):
         # Require all regressors in df
         m.fit(df)
     df['binary_feature2'] = [1] * 100 + [0] * 410
     m.fit(df)
     # Check that standardizations are correctly set
     self.assertEqual(
         m.extra_regressors['binary_feature'],
         {
             'prior_scale': 0.2,
             'mu': 0,
             'std': 1,
             'standardize': 'auto',
             'mode': 'additive',
         },
     )
     self.assertEqual(m.extra_regressors['numeric_feature']['prior_scale'],
                      0.5)
     self.assertEqual(m.extra_regressors['numeric_feature']['mu'], 254.5)
     self.assertAlmostEqual(m.extra_regressors['numeric_feature']['std'],
                            147.368585,
                            places=5)
     self.assertEqual(m.extra_regressors['numeric_feature2']['mode'],
                      'multiplicative')
     self.assertEqual(m.extra_regressors['binary_feature2']['prior_scale'],
                      10.)
     self.assertAlmostEqual(m.extra_regressors['binary_feature2']['mu'],
                            0.1960784,
                            places=5)
     self.assertAlmostEqual(m.extra_regressors['binary_feature2']['std'],
                            0.3974183,
                            places=5)
     # Check that standardization is done correctly
     df2 = m.setup_dataframe(df.copy())
     self.assertEqual(df2['binary_feature'][0], 0)
     self.assertAlmostEqual(df2['numeric_feature'][0], -1.726962, places=4)
     self.assertAlmostEqual(df2['binary_feature2'][0], 2.022859, places=4)
     # Check that feature matrix and prior scales are correctly constructed
     seasonal_features, prior_scales, component_cols, modes = (
         m.make_all_seasonality_features(df2))
     self.assertEqual(seasonal_features.shape[1], 30)
     names = ['binary_feature', 'numeric_feature', 'binary_feature2']
     true_priors = [0.2, 0.5, 10.]
     for i, name in enumerate(names):
         self.assertIn(name, seasonal_features)
         self.assertEqual(sum(component_cols[name]), 1)
         self.assertEqual(
             sum(np.array(prior_scales) * component_cols[name]),
             true_priors[i],
         )
     # Check that forecast components are reasonable
     future = pd.DataFrame({
         'ds': ['2014-06-01'],
         'binary_feature': [0],
         'numeric_feature': [10],
         'numeric_feature2': [10],
     })
     with self.assertRaises(ValueError):
         m.predict(future)
     future['binary_feature2'] = 0
     fcst = m.predict(future)
     self.assertEqual(fcst.shape[1], 37)
     self.assertEqual(fcst['binary_feature'][0], 0)
     self.assertAlmostEqual(
         fcst['extra_regressors_additive'][0],
         fcst['numeric_feature'][0] + fcst['binary_feature2'][0],
     )
     self.assertAlmostEqual(
         fcst['extra_regressors_multiplicative'][0],
         fcst['numeric_feature2'][0],
     )
     self.assertAlmostEqual(
         fcst['additive_terms'][0],
         fcst['yearly'][0] + fcst['weekly'][0] +
         fcst['extra_regressors_additive'][0],
     )
     self.assertAlmostEqual(
         fcst['multiplicative_terms'][0],
         fcst['extra_regressors_multiplicative'][0],
     )
     self.assertAlmostEqual(
         fcst['yhat'][0],
         fcst['trend'][0] * (1 + fcst['multiplicative_terms'][0]) +
         fcst['additive_terms'][0],
     )
     # Check works if constant extra regressor at 0
     df['constant_feature'] = 0
     m = Prophet()
     m.add_regressor('constant_feature')
     m.fit(df)
     self.assertEqual(m.extra_regressors['constant_feature']['std'], 1)
Ejemplo n.º 11
0
 def test_fit_changepoint_not_in_history(self):
     train = DATA[(DATA['ds'] < '2013-01-01') | (DATA['ds'] > '2014-01-01')]
     future = pd.DataFrame({'ds': DATA['ds']})
     forecaster = Prophet(changepoints=['2013-06-06'])
     forecaster.fit(train)
     forecaster.predict(future)
Ejemplo n.º 12
0
    def test_holidays(self):
        holidays = pd.DataFrame({
            'ds': pd.to_datetime(['2016-12-25']),
            'holiday': ['xmas'],
            'lower_window': [-1],
            'upper_window': [0],
        })
        model = Prophet(holidays=holidays)
        df = pd.DataFrame({'ds': pd.date_range('2016-12-20', '2016-12-31')})
        feats, priors, names = model.make_holiday_features(df['ds'])
        # 11 columns generated even though only 8 overlap
        self.assertEqual(feats.shape, (df.shape[0], 2))
        self.assertEqual((feats.sum(0) - np.array([1.0, 1.0])).sum(), 0)
        self.assertEqual(priors, [10., 10.])  # Default prior
        self.assertEqual(names, ['xmas'])

        holidays = pd.DataFrame({
            'ds': pd.to_datetime(['2016-12-25']),
            'holiday': ['xmas'],
            'lower_window': [-1],
            'upper_window': [10],
        })
        m = Prophet(holidays=holidays)
        feats, priors, names = m.make_holiday_features(df['ds'])
        # 12 columns generated even though only 8 overlap
        self.assertEqual(feats.shape, (df.shape[0], 12))
        self.assertEqual(priors, list(10. * np.ones(12)))
        self.assertEqual(names, ['xmas'])
        # Check prior specifications
        holidays = pd.DataFrame({
            'ds':
            pd.to_datetime(['2016-12-25', '2017-12-25']),
            'holiday': ['xmas', 'xmas'],
            'lower_window': [-1, -1],
            'upper_window': [0, 0],
            'prior_scale': [5., 5.],
        })
        m = Prophet(holidays=holidays)
        feats, priors, names = m.make_holiday_features(df['ds'])
        self.assertEqual(priors, [5., 5.])
        self.assertEqual(names, ['xmas'])
        # 2 different priors
        holidays2 = pd.DataFrame({
            'ds':
            pd.to_datetime(['2012-06-06', '2013-06-06']),
            'holiday': ['seans-bday'] * 2,
            'lower_window': [0] * 2,
            'upper_window': [1] * 2,
            'prior_scale': [8] * 2,
        })
        holidays2 = pd.concat((holidays, holidays2))
        m = Prophet(holidays=holidays2)
        feats, priors, names = m.make_holiday_features(df['ds'])
        self.assertEqual(priors, [8., 8., 5., 5.])
        self.assertEqual(set(names), {'xmas', 'seans-bday'})
        holidays2 = pd.DataFrame({
            'ds':
            pd.to_datetime(['2012-06-06', '2013-06-06']),
            'holiday': ['seans-bday'] * 2,
            'lower_window': [0] * 2,
            'upper_window': [1] * 2,
        })
        holidays2 = pd.concat((holidays, holidays2))
        feats, priors, names = Prophet(
            holidays=holidays2,
            holidays_prior_scale=4).make_holiday_features(df['ds'])
        self.assertEqual(priors, [4., 4., 5., 5.])
        # Check incompatible priors
        holidays = pd.DataFrame({
            'ds':
            pd.to_datetime(['2016-12-25', '2016-12-27']),
            'holiday': ['xmasish', 'xmasish'],
            'lower_window': [-1, -1],
            'upper_window': [0, 0],
            'prior_scale': [5., 6.],
        })
        with self.assertRaises(ValueError):
            Prophet(holidays=holidays).make_holiday_features(df['ds'])
df_humidity = dfall_humidity_1

dfall_pressure_1 = dfall_shanghai_day_pressure.rename(columns={
    'date': 'ds',
    'pressure': 'y'
})
#dfall['y'] = np.log(dfall['y'])
dfall_pressure_1['y'] = (dfall_pressure_1['y'] - dfall_pressure_1['y'].min()
                         ) / (dfall_pressure_1['y'].max() -
                              dfall_pressure_1['y'].min())
dfall_pressure_1['ds'] = pd.to_datetime(dfall_pressure_1['ds'])
dfall_pressure_1.set_index('ds')
df_pressure = dfall_pressure_1

m_temperature = Prophet(daily_seasonality=False,
                        weekly_seasonality=False,
                        changepoint_prior_scale=0.01)
m_temperature.fit(df_temperature)

m_humidity = Prophet(daily_seasonality=False,
                     weekly_seasonality=False,
                     changepoint_prior_scale=0.01)
m_humidity.fit(df_humidity)

m_pressure = Prophet(daily_seasonality=False,
                     weekly_seasonality=False,
                     changepoint_prior_scale=0.01)
m_pressure.fit(df_pressure)

future_temperature = m_temperature.make_future_dataframe(periods=180)
future_temperature.tail()
Ejemplo n.º 14
0
    def grid_search(self):
        """
        首先,对枚举参数结合cv进行网格寻参
        """
        parameters_grid = prophet_model.parameters_grid
        parameters_add = prophet_model.parameters_add
        self.__clean_data()

        if len(parameters_grid) > 0:
            ll = []
            for _, value in enumerate(parameters_grid):
                ll.append(parameters_grid[value])
            length_parms = len(parameters_grid)
            keys = list(parameters_grid.keys())
            for i in product(*ll):
                params = {}
                for j in range(length_parms):
                    params.setdefault(keys[j], i[j])
                mape = self.__cv_run(params)
                if self.mape > mape:
                    self.mape = mape
                    self.params = params
            #print(self.params,self.mape)  #仅测试用
        """
        然后,利用网格搜索的结果,对区间参数再单个参数使用坐标下降的方法结合Cv找最优
        """
        if len(parameters_add) > 0:
            a = []
            for i in list(self.params.values()):
                a.append([i])
            parameters = dict(dict(zip(list(self.params.keys()), a)),
                              **parameters_add)

            names = parameters.keys()
            setups = [list(param) for param in parameters.values()]
            i = len(setups) - 1
            j = 0
            current_setup = [s[0] for s in setups]
            while self.mape > 0.001:
                if i >= 0 and j >= len(
                        setups[i]):  # all values of this param have been tried
                    j = 1
                    i -= 1
                    if len(setups[i]) == 1:
                        continue
                if i < 0:  # no param could adjust
                    break
                current_setup[i] = setups[i][j]
                params = dict(zip(names, current_setup))
                #print(params, i, j)

                mape = self.__cv_run(params)
                print(f"mape: {mape}")
                if mape < self.mape:
                    self.mape = mape
                    self.params = params
                if mape > self.last_mape * 1.2:  # cp_cut, consider all parameter is linear correlated with mape
                    i -= 1
                    j = 1
                    continue
                self.last_mape = mape
                j += 1
            #print(self.mape, self.params)  #仅测试用

        self.history_model = Prophet(**self.params, holidays=self.holidays)
        self.history_model.fit(self.data)
        self.model_tag = 'started'

        #以下仅测试用
        '''
        future = self.history_model.make_future_dataframe(freq=self.freq,periods=self.predict_freq_num)  
        forecast = self.history_model.predict(future)
        self.history_model.plot(forecast).show()
        self.history_model.plot_components(forecast).show()
        print(f'score:{self.mape}\nparams:{self.params}')
        '''
        return self.params, self.mape, self.history_model, self.model_tag
title_counts = ko.vocab().most_common(150)

for i in range(0, len(title_counts)):
    title_counts[i].split(',')
title_counts[0].split(',')

wordcloud = WordCloud(
    font_path='c:\\Windows\\Fonts\\malgun.ttf',
    relative_scaling=0.2,
    background_color='white',
).generate_from_frequencies(dict(title_counts))

plt.figure(figsize=(12, 8))
plt.imshow(wordcloud)
plt.axis('off')
#2. 6개월 후 방문객 수 예측
df = pd.DataFrame({'ds': data['날짜'], 'y': data['방문']})
df.head()

m = Prophet(yearly_seasonality=True)
m.fit(df)
future = m.make_future_dataframe(periods=120)
future.tail()
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()
m.plot(forecast)
forecast.tail(1)
m.plot_components(forecast)

#3. 향후 어떤 기사를 실었으면 좋을지?
import pandas as pd
from fbprophet import Prophet

dataset_filename = './ftp/ENTRADA/datos.csv'
df = pd.read_csv(dataset_filename, index_col=0)
df.index = pd.to_datetime(df.index)

label_columns = ['pred_24h', 'pred_48h', 'pred_72h']
data_columns = [
    'ALAGON_NR', 'GRISEN_NR', 'NOVILLAS_NR', 'TAUSTE_NR', 'TUDELA_NR', 'ZGZ_NR'
]

data = df.loc[:, data_columns]
del df

# %%

df = data[data.index.year == 2018]['ZGZ_NR'].reset_index()
df.columns = ['y', 'ds']
df.head()

import numpy as np

df['ds'] = np.log(df['ds'])
df = df.fillna(0)
m = Prophet(changepoint_prior_scale=0.01).fit(df)

future = m.make_future_dataframe(periods=365, freq='H')
forecast = m.predict(future)
fig2 = m.plot_components(forecast)
Ejemplo n.º 17
0
        time_series.columns = ['ds', 'y', 'weekends', 'snap']
        
    #time_max = np.max(time_series['y']) * 1.1
    #time_series['cap'] = time_max

    time_series['cum7'] = cum7[i, (start_date-1):-28]
    # time_series['cum14'] = cum14[i, (start_date-1):-28]
    # time_series['cum28'] = cum28[i, (start_date-1):-28]
    # time_series['cum56'] = cum56[i, (start_date-1):-28]
    time_series['cum_max'] = cum_max[i, (start_date-1):-28]
    time_series['cum_zero'] = cum_zero[i, (start_date-1):-28]
    
    end_train = len(time_series) - 28
    time_series.loc[:, 'ds'] = pd.datetime(2011,1,29) + pd.to_timedelta(time_series['ds'] - 1, unit = 'd')
    
    m = Prophet(uncertainty_samples = 0, holidays = holidays, changepoint_prior_scale=0.9, holidays_prior_scale=0.05, yearly_seasonality = 5) #growth='logistic')

    # m.add_country_holidays(country_name='US')
    
    if price_regressor == True:
        m.add_regressor('price')
    
    m.add_regressor('weekends')
    m.add_regressor('snap')

    m.add_regressor('cum7', standardize = False)
    # m.add_regressor('cum14')
    # m.add_regressor('cum28')
    # m.add_regressor('cum56')
    m.add_regressor('cum_max', standardize = False)
    m.add_regressor('cum_zero', standardize = False)
Ejemplo n.º 18
0
def create_prophet_m(app_name, z1, delay=24):

    ### --- For realtime pred ---###

    full_df = z1.bw.iloc[0:len(z1)]
    full_df = full_df.reset_index()
    full_df.columns = ['ds', 'y']

    #removing outliers
    q50 = full_df.y.median()
    q100 = full_df.y.quantile(1)
    q75 = full_df.y.quantile(.75)

    if ((q100 - q50) >= (2 * q75)):

        full_df.loc[full_df.y >= (2 * q75), 'y'] = None

    #-- Realtime prediction --##
    #model
    model_r = Prophet(yearly_seasonality=False, changepoint_prior_scale=.2)
    model_r.fit(full_df)
    future_r = model_r.make_future_dataframe(periods=delay, freq='H')
    forecast_r = model_r.predict(future_r)
    forecast_r.index = forecast_r['ds']
    #forecast
    pred_r = pd.DataFrame(forecast_r['yhat'][len(z1):(len(z1) + delay)])
    pred_r = pred_r.reset_index()
    #--- completes realtime pred ---#

    train_end_index = len(z1.bw) - delay
    train_df = z1.bw.iloc[0:train_end_index]
    #train_df= train_df[train_df<cutter]

    test_df = z1.bw.iloc[train_end_index:len(z1)]

    train_df = train_df.reset_index()
    test_df = test_df.reset_index()
    train_df.columns = ['ds', 'y']

    #--- removing outliers in trainset  ---#

    q50 = train_df.y.median()
    q100 = train_df.y.quantile(1)
    q75 = train_df.y.quantile(.75)

    if ((q100 - q50) >= (2 * q75)):

        train_df.loc[train_df.y >= (2 * q75), 'y'] = None

    test_df.columns = ['ds', 'y']

    #model
    model = Prophet(yearly_seasonality=False, changepoint_prior_scale=.2)
    model.fit(train_df)
    future = model.make_future_dataframe(periods=len(test_df), freq='H')
    forecast = model.predict(future)
    forecast.index = forecast['ds']
    #forecast
    pred = pd.DataFrame(forecast['yhat'][train_end_index:len(z1)])
    pred = pred.reset_index()
    pred_df = pd.merge(test_df, pred, on='ds', how='left')
    pred_df.dropna(inplace=True)

    df = pd.DataFrame()

    if (len(pred_df) > 0):

        pred_df['error_test'] = pred_df.y - pred_df.yhat

        MSE = mse(pred_df.y, pred_df.yhat)
        RMSE = math.sqrt(MSE)
        pred_df['APE'] = abs(pred_df.error_test * 100 / pred_df.y)
        MAPE = pred_df.APE.mean()
        print("App name:", app_name)
        #print("MSE  :",MSE)
        print("RMSE :", RMSE)
        print("MAPE :", MAPE)

        mape_q98 = pred_df['APE'][
            pred_df.APE < pred_df['APE'].quantile(0.98)].mean()

        df = pd.DataFrame(
            {
                'length': len(z1),
                'test_rmse': RMSE,
                'test_mape': MAPE,
                'test_mape_98': mape_q98
            },
            index=[app_name])

    return (df, model, forecast, pred_df, pred_r)
        df_d_trn = df_d.loc[df_d.index[:int(train_prop * len(df_d.index))]]

        df_d_tst = df_d.loc[df_d.index[int(train_prop * len(df_d.index)):]]

        df_d_trn = prophet_inputize(df_d_trn, i)
        df_d_tst = prophet_inputize(df_d_tst, i)

        #        df_d_trn.plot(x='ds',y='y')
        #        df_d_tst.plot(x='ds',y='y')

        return (df_d_trn, df_d_tst)

    sample_freq = 'D'
    trn, tst = do_something(df_d, sample_freq)
    m = Prophet(yearly_seasonality=True,
                weekly_seasonality=True,
                daily_seasonality=True)
    m.fit(trn)
    future = m.make_future_dataframe(periods=len(tst), freq=sample_freq)
    forecast = m.predict(future)
    df_o[v] = forecast['yhat']
    #    df_o['ds']=forecast['ds']
    #    fig1 = m.plot(forecast)
    #    print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail())
    #    print(tst.tail())
    y_actual_tst = tst['y']
    y_predicted_tst = df_o[v][df_o.index[int(0.8 * len(df_o.index)):]]
    mse_tst.append(sqrt(mean_squared_error(y_actual_tst, y_predicted_tst)))

    y_actual_trn = trn['y']
    y_predicted_trn = df_o[v][df_o.index[:int(0.8 * len(df_o.index))]]
Ejemplo n.º 20
0
import pandas as pd
from fbprophet import Prophet

if __name__ == '__main__':
    sales_df = pd.read_csv('training_data.csv')
    print(
        "-----------Stock market price of MSFT on daily basis-------------------"
    )
    print(sales_df)
    print(sales_df, sales_df.columns, sales_df.index)
    sales_df = sales_df.rename(columns={'index': 'ds', 'close': 'y'})
    model = Prophet(changepoint_range=1)  #instantiate Prophet
    model.fit(sales_df)
    #fit the model with your dataframe
    future_dates = model.make_future_dataframe(periods=16, freq='d')
    forecast_data = model.predict(future_dates).rename(columns={
        'ds': 'Date',
        'yhat': 'Closing Price'
    })
    print(
        "-----------Prediction of stock price on daily basis-------------------"
    )
    print(forecast_data[['Date', 'Closing Price']][4634:])
Ejemplo n.º 21
0
def prophet_base_forecasts(boxcoxT, cap, capF, changepoint_prior_scale,
                           changepoints, daily_seasonality, freq, h, holidays,
                           holidays_prior_scale, include_history,
                           interval_width, mcmc_samples, method,
                           n_changepoints, nodes, seasonality_prior_scale,
                           sumMat, uncertainty_samples, weekly_seasonality, y,
                           yearly_seasonality):
    # TODO entrypoint for plugging in custom univariate forecasting method
    forecastsDict = {}
    mse_dict = {}
    resids_dict = {}
    nForecasts = sumMat.shape[0]

    if method == 'FP':
        nForecasts = sum(list(map(sum, nodes))) + 1

    for node in range(nForecasts):
        nodeToForecast = pd.concat([y.iloc[:, [0]], y.iloc[:, node + 1]],
                                   axis=1)
        if isinstance(cap, pd.DataFrame):
            cap1 = cap.iloc[:, node]
        else:
            cap1 = cap
        if isinstance(capF, pd.DataFrame):
            cap2 = capF.iloc[:, node]
        else:
            cap2 = capF
        if isinstance(changepoints, pd.DataFrame):
            changepoints1 = changepoints[:, node]
        else:
            changepoints1 = changepoints
        if isinstance(n_changepoints, list):
            n_changepoints1 = n_changepoints[node]
        else:
            n_changepoints1 = n_changepoints
        ##
        # Put the forecasts into a dictionary of dataframes
        ##
        with contextlib.redirect_stdout(open(os.devnull, "w")):
            # Prophet related stuff
            nodeToForecast = nodeToForecast.rename(
                columns={nodeToForecast.columns[0]: 'ds'})
            nodeToForecast = nodeToForecast.rename(
                columns={nodeToForecast.columns[1]: 'y'})
            if capF is None:
                growth = 'linear'
                m = Prophet(growth=growth,
                            changepoints=changepoints1,
                            n_changepoints=n_changepoints1,
                            yearly_seasonality=yearly_seasonality,
                            weekly_seasonality=weekly_seasonality,
                            daily_seasonality=daily_seasonality,
                            holidays=holidays,
                            seasonality_prior_scale=seasonality_prior_scale,
                            holidays_prior_scale=holidays_prior_scale,
                            changepoint_prior_scale=changepoint_prior_scale,
                            mcmc_samples=mcmc_samples,
                            interval_width=interval_width,
                            uncertainty_samples=uncertainty_samples)
            else:
                growth = 'logistic'
                m = Prophet(growth=growth,
                            changepoints=changepoints,
                            n_changepoints=n_changepoints,
                            yearly_seasonality=yearly_seasonality,
                            weekly_seasonality=weekly_seasonality,
                            daily_seasonality=daily_seasonality,
                            holidays=holidays,
                            seasonality_prior_scale=seasonality_prior_scale,
                            holidays_prior_scale=holidays_prior_scale,
                            changepoint_prior_scale=changepoint_prior_scale,
                            mcmc_samples=mcmc_samples,
                            interval_width=interval_width,
                            uncertainty_samples=uncertainty_samples)
                nodeToForecast['cap'] = cap1
            m.fit(nodeToForecast)
            future = m.make_future_dataframe(periods=h,
                                             freq=freq,
                                             include_history=include_history)
            if capF is not None:
                future['cap'] = cap2
            ##
            # Base Forecasts, Residuals, and MSE
            ##
            forecastsDict[node] = m.predict(future)
            resids_dict[node] = y.iloc[:, node + 1] - forecastsDict[
                node].yhat[:-h].values
            mse_dict[node] = np.mean(np.array(resids_dict[node])**2)
            ##
            # If logistic use exponential function, so that values can be added correctly
            ##
            if capF is not None:
                forecastsDict[node].yhat = np.exp(forecastsDict[node].yhat)
            if boxcoxT is not None:
                forecastsDict[node].yhat = inv_boxcox(forecastsDict[node].yhat,
                                                      boxcoxT[node])
                forecastsDict[node].trend = inv_boxcox(
                    forecastsDict[node].trend, boxcoxT[node])
                if "seasonal" in forecastsDict[node].columns.tolist():
                    forecastsDict[node].seasonal = inv_boxcox(
                        forecastsDict[node].seasonal, boxcoxT[node])
                if "daily" in forecastsDict[node].columns.tolist():
                    forecastsDict[node].daily = inv_boxcox(
                        forecastsDict[node].daily, boxcoxT[node])
                if "weekly" in forecastsDict[node].columns.tolist():
                    forecastsDict[node].weekly = inv_boxcox(
                        forecastsDict[node].weekly, boxcoxT[node])
                if "yearly" in forecastsDict[node].columns.tolist():
                    forecastsDict[node].yearly = inv_boxcox(
                        forecastsDict[node].yearly, boxcoxT[node])
                if "holidays" in forecastsDict[node].columns.tolist():
                    forecastsDict[node].yearly = inv_boxcox(
                        forecastsDict[node].yearly, boxcoxT[node])

    return forecastsDict, mse_dict
def cast_prediction(arg_data):
    m = Prophet()
    m.fit(arg_data)
    future_data = m.make_future_dataframe(periods=365)
    forecast = m.predict(future_data)
    m.plot(forecast)
Ejemplo n.º 23
0
plt.show()

new_colname = 'y'
train_ind2.index.rename('ds', inplace=True)
train_ind2.rename(columns = {'Confirmed' : 'y'},inplace=True)
train_ind2.reset_index(level=0, inplace=True)
train_ind2.head()

from fbprophet import Prophet

# instantiate the model and set parameters
model = Prophet(
    interval_width=0.95,
    holidays = pd.DataFrame({'holiday': 'lockdown','ds': pd.to_datetime(['2020-03-24','2020-03-25','2020-03-26','2020-03-27','2020-03-28','2020-03-29','2020-03-30','2020-03-31','2020-04-01'
    ,'2020-04-02','2020-04-03','2020-04-04','2020-04-05','2020-04-05','2020-04-06','2020-04-07','2020-04-08','2020-04-09','2020-04-10','2020-04-11','2020-04-12','2020-04-13','2020-04-14'])}),
    growth='linear',
    daily_seasonality=False,
    weekly_seasonality=True,
    yearly_seasonality=True,
    seasonality_mode='multiplicative'
)

# fit the model to historical data

model.fit(train_ind2)

future_pd = model.make_future_dataframe(
    periods=60,
    freq='d',
    include_history=True
)
def cast_complex_prediction(arg_data):
    m = Prophet(yearly_seasonality=True)
    m.fit(arg_data)
    future_data = m.make_future_dataframe(periods=300)
    forecast = m.predict(future_data)
    m.plot(forecast)
Ejemplo n.º 25
0
import pandas as pd
import numpy as np
from GTSFutur import GTSPredictor
import matplotlib.pyplot as plt
from fbprophet import Prophet
import time

past = -3 * 168

look_back = 168
len_pred = 200
df = pd.read_csv("test.csv", sep=",")
df = df[:past]
period = 24

m = Prophet(changepoint_prior_scale=0.001)
m.fit(df)
future = m.make_future_dataframe(periods=period, freq='H')
forecast = m.predict(future)
#model=GTSPredictor(
#model=model.fit(df,look_back=look_back,freq_period=period,seq2seq=True,len_pred=len_pred)
#pred=model.make_prediction(df["y"])
"""
model=GTSPredictor()
prediction_3= model.fit_predict_ES(df,period,len_pred)"""
#model=GTSPredictor()
#prediction_xgb=model.fit_predict_XGBoost( df, "D", "%Y-%m-%d",steps=len_pred, early_stopping_rounds=300, test_size=0.01,nb_estimators=1000)
"""model=GTSPredictor()
model.fit(df,look_back=look_back,freq_period=period)
prediction_2,lower,upper=model.predict(steps=len_pred)"""
def default_prophet_model(df):
    model = Prophet()
    model.fit(df)
    return model
print(df.groupby('type').mean())

#Abacoda type -> conventional, Abaconda region = TotalUs
df = df.loc[(df.type == 'conventional') & (df.region == 'TotalUS')]

#string type -> datetime
df.sort_values(by=['Date'])

df['Date'] = pd.to_datetime(df['Date'])

#initialize reset_index
data = df[['Date', 'AveragePrice']].reset_index(drop=True)

print(data.head())
data = data.rename(columns={'Date': 'ds', 'AveragePrice': 'y'})

plt.plot(data.ds, data.y, (16, 8))

model = Prophet()
model.fit(data)

future = model.make_future_dataframe(periods=365)
forecast = model.predict(future)
print(forecast.tail())

fig1 = model.plot(forecast)
#plt.show()

fig2 = model.plot_components(forecast)
#plt.show()
def multiplicative_prophet_model(df):
    model = Prophet(seasonality_mode='multiplicative')
    model.fit(df)
    return model
Ejemplo n.º 29
0
# Python
import pandas as pd
from fbprophet import Prophet

df = pd.read_csv('example_wp_log_peyton_manning.csv')
df.head()

m = Prophet()
m.fit(df)

future = m.make_future_dataframe(periods=365)
future.tail()

forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()

fig1 = m.plot(forecast)

fig2 = m.plot_components(forecast)

fig1.savefig("Graphs/fig1.png")
fig2.savefig("Graphs/fig2.png")
Ejemplo n.º 30
0
def PlotSeries():

    #obtém valores de selects da pagina
    select_ano = request.form.get("Anos", None)
    select_mun = request.form.get("Municipios", None)
    select_dp = request.form.get("Delegacias", None)
    select_crime = request.form.get("Crimes", None)

    if select_mun != None and select_mun != "" and select_dp != None and select_dp != "" and select_crime != None and select_crime != "":

        #dá um nome para o arquivo do plot
        img = 'static/plot' + select_ano + select_mun + select_dp + select_crime + '.png'

        #obtém o dataframe
        df = getDataAtDB(select_mun, select_dp, select_crime)
        df['datas'] = pd.to_datetime(df['datas'])

        #altera colunas do dataframe
        df.set_index('datas')
        df.columns = ["ds", "y"]

        #cria um modelo
        m = Prophet(changepoint_prior_scale=0.05, changepoint_range=0.8)
        m.add_country_holidays(country_name='BR')
        m.fit(df)

        #prevendo o futuro
        future = m.make_future_dataframe(periods=12 * int(select_ano),
                                         freq='MS')
        forecast = m.predict(future)

        #cria imagem do plot
        m.plot(forecast, figsize=(8, 4))
        plt.xlabel('Data')
        plt.ylabel('Ocorrencias')
        plt.gca().set_ylim(bottom=0)
        plt.title("Série temporal das ocorrências de " + select_crime +
                  " registradas no " + select_dp)
        plt.savefig(img, bbox_inches='tight')

        plt.clf()  #limpa figura atual

        # df_cv = cross_validation(m, initial='3600 days', horizon = '1200 days', parallel="processes")
        # df_p = performance_metrics(df_cv)
        # print(df_p.head())

        #Otimização dos hiperparametros
        # params_df = create_param_combinations(**param_grid)
        # print(len(params_df.values))
        # for param in params_df.values:
        #     param_dict = dict(zip(params_df.keys(), param))
        #     cv_df = single_cv_run(df, metrics, param_dict, parallel="processes")
        #     results.append(cv_df)
        # results_df = pd.concat(results).reset_index(drop=True)
        # best_param = results_df.loc[results_df['rmse'] == min(results_df['rmse']), ['params']]
        # print(f'\n The best param combination is {best_param.values[0][0]}')
        # print(results_df)

        return render_template("previsao.html", image=img)

    return render_template("previsao.html")