from fbprophet import Prophet
from neuralprophet import NeuralProphet

from pathlib import Path
import sys

sys.path.insert(0, str(Path(os.getcwd()).parent / "utilities"))
import utilities
import data
# %% [markdown]
# The data used for this experiment is weather data - daily temperatures over a few years with no other covariates.
# We format the data appropriately for the Facebook Prophet API,
# with a datetime column `ds` and an output column `y`.
# We split the data into the usual train and test sets.
# %% Generate data
df = data.get_weather_data()
df_train, df_test = utilities.split_ts(df)

ax = df_train["y"].plot(figsize=(10, 6), label="train")
df_test["y"].plot(ax=ax, label="test")
ax.legend()
ax.set_xlabel("Date")
ax.set_ylabel("Temp (°C)")

df_train = df_train.reset_index()
df_test = df_test.reset_index()

df_train.head(10)
# %% [markdown]
# ## How fast is `neuralprophet`?
# We train a default `fbprophet` model 10 times and measure the computation time for each.
Ejemplo n.º 2
0
def analyze_weather_delays():

	logger = logging.getLogger(__name__)

	# === Read DELAYS and WEATHER data
	logger.info('<--Fetching data-->')
	delays = dt.get_lineie_69_data()
	flat = dt.get_linie_94_data()
	weather = dt.get_iac_weather_data()
	oldWeather = dt.get_weather_data()

	# === Check for outliers/errors in weather data 
	q = 3 #weather.rain.quantile(0.99975)
	mask = weather.rain < q
	weather = weather[mask]
	del mask, q

	# === Focus on BUS 69
	logger.info('<--Prepare bus 69 data-->')
	mask = delays.linie == 69
	delays = delays[mask]
	delays.reset_index(drop=True, inplace=True)
	del mask

	# ==== Remove NaN where there is no public transport data
	mask = delays.betriebsdatum > datetime.datetime(2018, 2, 4)
	delays = delays[mask]
	del mask

	# ==== Flat line - bus 94
	logger.info('<--Prepare bus 94 delay data-->')
	flat.loc[:, 'diff'] = flat.ist_an_von - flat.soll_an_von
	flat.loc[:, 'time'] = pandas.to_datetime(flat.soll_an_von.copy().astype(float), errors='coerce', unit='s')
	flat.time = flat.time.dt.strftime('%H:%M')
	flat.loc[:, 'datetime'] = pandas.to_datetime(flat.datum_von.astype(str) + ' ' + flat.time)
	flat.datetime = flat.datetime.dt.round('60min')

	# === Extract exact time delays
	logger.info('<--Prepare bus 69 delay data-->')
	delays.loc[:, 'diff'] = delays.ist_an_von - delays.soll_an_von
	delays.loc[:, 'time'] = pandas.to_datetime(delays.soll_an_von.copy().astype(float), errors='coerce', unit='s')
	delays.time = delays.time.dt.strftime('%H:%M')
	delays.loc[:, 'datetime'] = pandas.to_datetime(delays.datum_von.astype(str) + ' ' + delays.time)
	delays.datetime = delays.datetime.dt.round('60min')

	# === Try to remove DAILY SEASONALITY by subtracting previous weeks's value
	logger.info('<--Compute de-seasoning for bus lines-->')
	_delays = delays.set_index('datetime', drop=True)
	_delays.index = pandas.to_datetime(_delays.index)
	__delays = _delays.groupby(_delays.index).sum()

	_flat = flat.set_index('datetime', drop=True)
	_flat.index = pandas.to_datetime(_flat.index)
	__flat = _flat.groupby(_flat.index).sum()

	timeDelta = datetime.timedelta(days=7)
	temp = __delays['diff'].copy() - __delays['diff'].shift(freq=timeDelta)
	weeklyDetrendedBus69 = temp.dropna(how='all', axis=0)
	weeklyDetrendedBus69 = weeklyDetrendedBus69.interpolate()

	temp = __flat['diff'].copy() - __flat['diff'].shift(freq=timeDelta)
	weeklyDetrendedBus94 = temp.dropna(how='all', axis=0)
	weeklyDetrendedBus94 = weeklyDetrendedBus94.interpolate()

	del timeDelta, temp

	plt.figure()
	weeklyDetrendedBus69.plot(title='de-seasoned delay (bus 69) data (diff-of-diff)')

	plt.figure()
	weeklyDetrendedBus94.plot(title='de-seasoned delay (bus 94) data (diff-of-diff)')

	plt.show()

	# === Extract weather measures
	weather.loc[:, 'datetime'] = weather.index.round('60min')

	# === GROUPBY and RESAMPLE 
	groupSumDelaysByHour = delays.groupby('datetime').sum()
	groupMeanDelaysByHour = delays.groupby('datetime').mean()
	groupSumFlatByHour = flat.groupby('datetime').sum()
	groupMeanFlatByHour = flat.groupby('datetime').mean()
	resampleSumWeatherByHour = weather.resample('H').sum()
	resampleMeanWeatherByHour = weather.resample('H').mean()
Ejemplo n.º 3
0
@author: tjcombs

"""

import pandas as pd
import dash
import dash_core_components as dcc
import dash_html_components as html
import plotly.express as px
from data import get_generation_data, get_weather_data

generation = get_generation_data()
generation = generation[('2020-05-15' <= generation['DATE_TIME'])
                        & (generation['DATE_TIME'] <= '2020-06-17')]
weather = get_weather_data()

panel = generation[generation['SOURCE_KEY'] == 'vOuJvMaM2sgwLmb']

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

generation_agg = generation.groupby(by=['PLANT_ID', 'DATE_TIME']).agg(
    MEAN_AC_POWER=('AC_POWER', 'mean')).reset_index(drop=False)
generation_agg = pd.merge(left=generation_agg,
                          right=weather,
                          on=['DATE_TIME', 'PLANT_ID'])

fig1 = px.line(data_frame=generation_agg,
               x='DATE_TIME',
Ejemplo n.º 4
0
def main():

	# === Read DELAYS and WEATHER data
	delays = dt.get_lineie_69_data()
	weather = dt.get_weather_data()

	# === Focus on BUS 69
	mask = delays.linie == 69
	delays = delays[mask]
	delays.reset_index(drop=True, inplace=True)

	# === Extract exact time delays
	delays.loc[:, 'diff'] = delays.ist_an_von - delays.soll_an_von
	delays.loc[:, 'time'] = pandas.to_datetime(delays.soll_an_von.copy().astype(float), errors='coerce', unit='s')
	delays.time = delays.time.dt.strftime('%H:%M')
	delays.loc[:, 'datetime'] = pandas.to_datetime(delays.datum_von.astype(str) + ' ' + delays.time)
	delays.datetime = delays.datetime.dt.round('60min')

	# === Show delay pattern as a function of time of day
	temp = delays.copy()
	temp.loc[:, 'hour_of_day'] = pandas.to_datetime(temp.time).dt.hour
	temp = temp.groupby('hour_of_day').mean() 
	fig, ax = plt.subplots(1)
	ax.plot(temp.index, temp['diff'])
	ax.set_ylabel('Average delay [s]')
	ax.set_xlabel('Time of Day [HH:MM]')

	for tick in ax.get_xticklabels():
		tick.set_rotation(90)

	plt.savefig('delay_vs_time-of-day.png')

	# === Merge with WEATHER data 
	weatherDelays = weather.merge(delays, right_on='datetime', left_index=True, how='inner')
	weatherDelays.to_csv(os.path.join(dt.data_dir(), 'weather_delays_merged.csv'))

	# ==== Remove NaN where there is no public transport data
	mask = weatherDelays.datetime > datetime.datetime(2018,2,4)
	weatherDelays = weatherDelays[mask]
	del mask
	
	cumulativeWeatherDelays = weatherDelays.groupby('datetime').mean()
	averageWeatherDelays = weatherDelays.groupby('datetime').mean()

	# === Estimate DAILY SEASONALITY using Fourier transform
	'''
	Description:
		Fourier transform of time-series data in time domain (yt, xt) to frequency domain (yf, xf):

	Arguments:
		:param n: (float) number of data points / observations  
		:param T: (float) maximum frequency of data i.e. 1H, 1m, 1s 	
	'''
	n, m = cumulativeWeatherDelays.shape
	T = 1/n
	yf = scipy.fftpack.fft(weatherDelays['diff'].values)
	xf = numpy.linspace(0, 1/2.0 * T, n/2)

	fig, ax = plt.subplots()
	ax.plot(xf, 2.0/n * numpy.abs(yf[:n//2]))

	# TODO Buiild function that 1. Picks data window, 2. FFT, 3. Removes frequency in xf domain, 4. IFFT, 5. Corr

	# === Try to remove DAILY SEASONALITY by subtracting previous day's value
	timeDelta = datetime.timedelta(days=1)
	temp = cumulativeWeatherDelays.copy() - cumulativeWeatherDelays.shift(freq=timeDelta)
	dailySeasoned = temp.dropna(how='all', axis=0)
	dailySeasoned = dailySeasoned.interpolate()
	del timeDelta, temp

	plt.figure()
	dailySeasoned['diff'].plot()

	# === Try to remove DAILY SEASONALITY by subtracting previous weeks's value
	timeDelta = datetime.timedelta(days=7)
	temp = cumulativeWeatherDelays.copy() - cumulativeWeatherDelays.shift(freq=timeDelta)
	weeklySeasoned = temp.dropna(how='all', axis=0)
	weeklySeasoned = weeklySeasoned.interpolate()
	del timeDelta, temp

	plt.figure()
	weeklySeasoned['diff'].plot()

	# === Plot data with and without seasoning treatment 
	fig, axes = plt.subplots(2, sharex=True, figsize=(15, 10))
	matplotlib.rc('xtick', labelsize=24) 
	matplotlib.rc('ytick', labelsize=24)
	axis=0
	axes[axis].plot(weeklySeasoned.index, cumulativeWeatherDelays.reindex(weeklySeasoned.index)['diff'])
	axes[axis].set_xlabel('Without de-seasoning')
	axes[axis].set_ylabel('Delay [s]')

	axis+=1
	axes[axis].plot(weeklySeasoned.index, weeklySeasoned['diff'])
	axes[axis].set_xlabel('With de-seasoning')
	axes[axis].set_ylabel('Delay [s]')

	fig.savefig('seasoned_vs_de-seasoned_delay_data.png')

	# === Plot data without de-seasoning and rainfall data
	fig, axes = plt.subplots(2, sharex=True, figsize=(15, 10))

	axis=0
	axes[axis].plot(cumulativeWeatherDelays.index, cumulativeWeatherDelays['diff'])
	axes[axis].set_xlabel('Without de-seasoning')
	axes[axis].set_ylabel('Delay [s]')

	axis+=1
	axes[axis].plot(weeklySeasoned.index, weeklySeasoned['niederschlag_mm'])
	axes[axis].set_xlabel('Rain data')
	axes[axis].set_ylabel('Rainfall [mm]')

	fig.savefig('seasoned_vs_rainfall_data.png')


	# === Plot delay-vs-weather graphs for de-seasoned data

	'''
	Description:
		Scatter plot between CUMULATIVE MM RAIN and DELAYS
	'''
	mask = cumulativeWeatherDelays.reindex(index=weeklySeasoned.index)['niederschlag_mm'] > 0 
	xData = cumulativeWeatherDelays.reindex(index=weeklySeasoned.index)['niederschlag_mm'][mask]
	yData = weeklySeasoned['diff'].loc[xData.index]
	corrMat = numpy.corrcoef(xData, yData)
	corrCoefPatch = mpatches.Patch(color='blue', label='Correlation coefficient := %.2f' %corrMat[0][1])
	plt.figure()
	plt.scatter(x=xData, y=yData, marker='x')
	plt.xlabel('Precipitation (mm)')
	plt.ylabel('De-seasoned delay (s)')
	plt.legend(handles=[corrCoefPatch])
	plt.tight_layout()
	plt.savefig('corr_rain_vs_delay_-_with_de-seasoning.png')

	del xData, yData

	mask = cumulativeWeatherDelays['niederschlag_mm'] > 0 
	xData = cumulativeWeatherDelays['niederschlag_mm'][mask]
	yData = cumulativeWeatherDelays['diff'].loc[xData.index]
	corrMat = numpy.corrcoef(xData, yData)
	corrCoefPatch = mpatches.Patch(color='blue', label='Correlation coefficient := %.2f' %corrMat[0][1])
	plt.figure()
	plt.scatter(x=xData, y=yData, marker='x')
	plt.xlabel('Precipitation [mm]')
	plt.ylabel('Delay [s]')
	plt.legend(handles=[corrCoefPatch])
	plt.tight_layout()
	plt.savefig('corr_rain_vs_delay_-_no_de-seasoning.png')

	del xData, yData

	'''
	Description:
		Time-series plot between CUMULATIVE RAIN and DE-SEASONED DELAY
	'''
	xData = cumulativeWeatherDelays['niederschlag_mm']
	print(xData)
	yData = cumulativeWeatherDelays['diff']

	fig, ax = plt.subplots(2, sharex=True, figsize=(15, 10))

	axis=0
	ax[axis].plot(yData.index, yData) 
	ax[axis].set_ylabel('Delay [s]')

	axis+=1
	ax[axis].bar(xData.index, height=xData, width=0.05, color='green')
	ax[axis].set_xlabel('YYYY-MM-DD:HH')