async def main(): """ Main function of the application. :return: Nothing. """ print_header() timer_main = Timer() config = default_config() logger.info("start predicting new time") config["influx"]["drops"] = '["pm1", "pm4.0", "result", "table", "_time"]' config["influx"]["limit"] = "10000" with InfluxSensorData(config=config, name="influx") as client: # Load the data from the server data = client.get_data() imputed_data = impute_simple_imputer(data) # Impute avg_data = moving_average(imputed_data) # Average input logger.info(f"data len {len(avg_data)}") sns.set_theme(style="darkgrid") g = sns.jointplot(x="pm2.5", y="pm10", data=avg_data, kind="reg", truncate=False, xlim=(0, 40), ylim=(0, 40), color="m", height=7) g2 = sns.jointplot(x="temperature", y="humidity", data=avg_data, kind="reg", truncate=False, color="m", height=7) g3 = sns.jointplot(x="humidity", y="pm10", data=avg_data, kind="reg", truncate=False, color="m", height=7) g4 = sns.jointplot(x="temperature", y="pm10", data=avg_data, kind="reg", truncate=False, color="m", height=7)
def test_lstm_model(): n = 100000 start = 0 stop = 140 # produce some test data: X contains sin, cos and y contains 1/(1+cos^2). Random noise is also added to both data_sin = np.sin(np.linspace(start, stop, n)) + (np.random.random(n) * 0.4 - 0.2) data_cos = np.cos(np.linspace(start, stop, n)) + (np.random.random(n) * 0.4 - 0.2) data_res = 1.0 / (1.0 + np.cos(np.linspace(start, stop, n)) ** 2) + (np.random.random(n) * 0.2 - 0.1) data = DataFrame([data_sin, data_cos, data_res], index=["sin", "cos", "res"]).transpose() # For fitting, we need need x.shape == [n, v, f] and y.shape == [n, v, f] where: # - n is the number of samples # - v is the number of feature vectors in each sample # - f is the number of features # The sequence y[i, :-1, :] should equal x[i, 1:, :], i.e. is offset by one conf = default_config() conf['lstm']['features_in'] = '["sin", "cos", "res"]' conf['lstm']['features_out'] = '["sin", "cos", "res"]' model = train_lstm_model_predict(conf['lstm'], data) x_test, y_test = prepare_window_off_by_1(data[-50:], 20) y_pred = np.array([model.predict_next_n(x_test[i]) for i in range(x_test.shape[0])]) # model.predict(x_test, fh=10) y_pred_seq = model.predict_sequence_n(x_test[0], y_test.shape[0]) plt.plot(y_test[:, -1, 0]) # blue plt.plot(y_pred[:, 0]) # green plt.plot(y_pred_seq[:, 0]) # orange plt.show()
def __init__(self, config: SectionProxy = None): if config is None: config = default_config()['normalization'] self.pm_max = float(config['pm_max']) self.humidity_max = float(config['humidity_max']) self.pressure_min = float(config['pressure_min']) self.pressure_max = float(config['pressure_max']) self.temperature_min = float(config['temperature_min']) self.temperature_max = float(config['temperature_max'])
def graph_comparison_our_vs_hist(): """ Graphs a day of our data and the same day of historical data """ _from = "2021-05-10 00:00:00" _to = "2021-05-15 00:00:00" config = default_config() adapter_hist = HistDataAdapter(config, "graph_comparison_our_hist_hist") adapter_influx = InfluxSensorData(config, "graph_comparison_our_hist_influx") hist_data = adapter_hist.get_data(output=False) our_data = adapter_influx.get_data_8610() our_data.columns = [ "Date", "Humidity [%] (Our)", "PM10 [µg/m³] (Our)", "Temperature [°C] (Our)" ] our_data.set_index("Date", inplace=True) # select some days our_data = our_data[our_data.index.to_series() < _to] our_data = our_data[our_data.index.to_series() >= _from] # prepare historical data hist_data.set_index("date", inplace=True) hist_data = hist_data[hist_data.index.to_series() < _to] hist_data = hist_data[hist_data.index.to_series() >= _from] hist_data.drop(labels=[ "Zch_Stampfenbachstrasse.Pressure", "Zch_Stampfenbachstrasse.PM2.5" ], axis=1, inplace=True) hist_data.columns = [ "PM10 [µg/m³] (Official)", "Humidity [%] (Official)", "Temperature [°C] (Official)" ] our_data.index = our_data.index.tz_localize(None) data = our_data.join(hist_data, how="outer") data.sort_index(axis=1, inplace=True) # plot our data palette = sns.color_palette( ["#2222ff", "#0000ff", "#777777", "#666666", "#ff2222", "#ff0000"]) sns.set_theme(style="darkgrid") plot = sns.relplot(kind="line", palette=palette, data=data, dashes=[(2, 2), "", (2, 2), "", (2, 2), ""]) plot.tight_layout() plot.ax.set_title("Comparison Our Data vs Official Data") plot.fig.autofmt_xdate() plot.savefig("comparison_our_hist.png")
def upload_influx_db(con: Connection, table: str, start_time: datetime = None): from influxdb_client import InfluxDBClient from influxdb_client.client.write_api import SYNCHRONOUS client = InfluxDBClient.from_config_file(default_config()['influx']['config']) write_api = client.write_api(write_options=SYNCHRONOUS) cols = set(con.execute(f'SELECT * FROM {table} LIMIT 1').keys()) places = set(p.split('.', 1)[0] for p in cols) places_pm10 = set(p for p in places if f'{p}.PM10' in cols) places_pm2_5 = set(p for p in places if f'{p}.PM2.5' in cols) places_temperature = set(p for p in places if f'{p}.Temperature' in cols) places_rainfall = set(p for p in places if f'{p}.Rainfall' in cols) places_humidity = set(p for p in places if f'{p}.Humidity' in cols) places_pressure = set(p for p in places if f'{p}.Pressure' in cols) places = sorted(set(chain(places_pm10, places_pm2_5, places_temperature, places_rainfall))) q = " || '\n' || ".join( f"""'pollution,place={p} ' || SUBSTR(""" + (f"""IFNULL(',pm10=' || "{p}.PM10", '') || """ if p in places_pm10 else '') + (f"""IFNULL(',pm2.5=' || "{p}.PM2.5", '') || """ if p in places_pm2_5 else '') + f"""'', 2) || STRFTIME(' %s000000000', date) || '\n' || """ + f"""'weather,place={p} ' || SUBSTR(""" + (f"""IFNULL(',temperature=' || "{p}.Temperature", '') || """ if p in places_temperature else '') + (f"""IFNULL(',rainfall=' || "{p}.Rainfall", '') || """ if p in places_rainfall else '') + (f"""IFNULL(',pressure=' || "{p}.Pressure", '') || """ if p in places_pressure else '') + (f"""IFNULL(',humidity=' || "{p}.Humidity", '') || """ if p in places_humidity else '') + f"""'', 2) || STRFTIME(' %s000000000', date)""" for p in places ) query = f"""SELECT {q} AS line_data FROM {table}""" if start_time is not None: query += f" WHERE date >= '{start_time.isoformat()}'" res = con.execute(query) sequence = [l for row in res for l in row['line_data'].split('\n') if ' ' not in l] print('SQL query:', query) # print('\n'.join(sequence)) write_api.write(INFLUXDB_HIST_DATA_BUCKET, records=sequence) print('Done!')
def main(): global_config = dict(lr=[0.001, 0.0001], schedule=[np.inf], batch_size=64, dev_every=1, seed=0, model=None, use_nesterov=False, gpu_no=0, cache_size=32768, momentum=0.9, weight_decay=0.00001) builder = ConfigBuilder(default_config(), global_config) parser = builder.build_argparse() # parser.add_argument("--no_cuda", type=str2bool, nargs='?', const=True) config = builder.config_from_argparse(parser) if config["model_type"] == "EdgeCRNN": model = EdgeCRNN(width_mult=config["width_mult"]) model = torch.nn.DataParallel(model) if config["model_type"] == "shuffleNet": from nets.ShuffleNetV2 import shufflenetv2 model_shuffle = shufflenetv2(width_mult=config["width_mult"]) model = torch.nn.DataParallel(model_shuffle) elif config["model_type"] == "mobileNet": from nets.MobileNetV2 import MobileNetV2 model = MobileNetV2(width_mult=config["width_mult"]) elif config["model_type"] == "mobileNetV3-Small": from nets.MobileNetV3 import MobileNetV3_Small model = MobileNetV3_Small() elif config["model_type"] == "mobileNetV3-Large": from utils.MobileNetV3 import MobileNetV3_Large model = MobileNetV3_Large() elif config["model_type"] == "Tpool2": from nets.Tpool2 import CNN model = CNN() else: pass config["model"] = model set_seed(config) if config["type"] == "train": train(config) elif config["type"] == "eval": evaluate(config)
async def main(): """ Main function of the application. :return: Nothing. """ print_header() timer_main = Timer() config = default_config() # read and prepare dataset for training # df_timeseries_complete = load_dataset("zurich_adapter", config) with InfluxSensorData(config=config, name="influx") as client: # Load the data from the server df_timeseries_complete = client.get_data().rename(columns={ "humidity": "Live.Humidity", "pm10": "Live.PM10", "temperature": "Live.Temperature", "_time": "date" }) df_timeseries = chop_first_fringe(df_timeseries_complete) # Chop first improper filled rows imputed_timeseries = impute_simple_imputer(df_timeseries) smooth_timeseries = moving_average(imputed_timeseries) smooth_timeseries.dropna(inplace=True) # Make sure there really is no empty cell anymore, else drop row # Split training/testing data in 80%/20% df_train_val, df_test = temporal_train_test_split(smooth_timeseries, test_size=.20) # Define all models at our disposal models = [ ModelHolder(name="arima", trainer=train_or_load_ARIMA, config=config), ModelHolder(name="autoarima", trainer=train_or_load_autoARIMA, config=config), ModelHolder(name="expsmooting", trainer=train_or_load_expSmoothing, config=config), ModelHolder(name="lstm", trainer=train_or_load_LSTM, config=config), ModelHolder(name="lstm_seq", trainer=train_or_load_LSTM, config=config) ] # Train the models trained_models = await gather(*[to_thread(train_model, model=model, data=df_train_val) for model in models]) [model.model.store(model.config) for model in trained_models] # Stores if not existing. Does NOT OVERWRITE!!! # Test the generalization performance of our models forecast_test = [model.model.predict(x=df_test, fh=5) for model in trained_models] all: DataFrame = df_test.copy() all["Arima.PM10"] = forecast_test[0].values all["AutoArima.PM10"] = forecast_test[1].values all["ExpSmoothing.PM10"] = forecast_test[2].values all["LSTM.PM10"] = (forecast_test[3]['Live.PM10_Pred']) all["LSTMSeq.PM10"] = (forecast_test[4]['Live.PM10_Pred']) # all.to_csv(PROJECT_DIR / 'pm10_predictions.csv') print(all) logger.info(f"Script completed in {timer_main}.") logger.info("Terminating gracefully...") exit(0) logger.info("start predicting new time") with InfluxSensorData(config=config, name="influx") as client: # Load the data from the server data = client.get_data().rename(columns={ "humidity": "Live.Humidity", "pm10": "Live.PM10", "temperature": "Live.Temperature" }) imputed_data = impute_simple_imputer(data) # Impute avg_data = moving_average(imputed_data) # Average input logger.debug("Forecasting") forecast_list = [model.model.predict(x=avg_data, fh=5) for model in trained_models] # Make predictions logger.info(forecast_list) forecast_dict = { "arima": forecast_list[0], "autoarima": forecast_list[1], "expsmoothing": forecast_list[2], "lstm": forecast_list[3].iloc[:, forecast_list[3].columns.get_loc("Live.PM10_Pred")], "lstm_seq": forecast_list[4].iloc[:, forecast_list[4].columns.get_loc("Live.PM10_Pred")] } forecast = pd.DataFrame(data=forecast_dict) logger.debug(forecast) forecast=forecast.mean(axis=1).head(n=50) logger.info(f"Forcasting finished with forecast value\n {forecast}") sns.set_theme(style="darkgrid") sns.lineplot(data=forecast)
from pathlib import Path from models import autoarima from models import expsmoothing from models import arima from models import lstm from preprocessing.imputing import impute_simple_imputer from preprocessing.moving_average import moving_average from utils.sqlite_utils import get_engine, get_time_series from utils.config import default_config # TODO: determine some sane forecasting horizon here fh = 48 config = default_config() from_excel = False def write_model_graph(y_train, y_test, y_pred, name): data = np.array([ np.append(y_train, [np.nan] * y_test.shape[0]), np.append([np.nan] * y_train.shape[0], y_test), np.append([np.nan] * y_train.shape[0], y_pred) ]).transpose() idx = y_train.shape[0] - 1 data[idx][1] = data[idx][0] data[idx][2] = data[idx][0] palette = sns.color_palette(["#0000ff", "#00bb00", "#ff0000"])
'|> pivot(columnKey: ["_field"], rowKey: ["_time"], valueColumn: "_value") // Instread of having sequence of fields get table of entries\n' '|> filter(fn: (r) => r.device == "device3")' f'|> limit(n: {self.limit})// debug remove me after \n' '|> sort(columns: ["_time"]) // Before exit always sort as we want to have a timeline \n' '|> yield()\n').drop(labels=self.drops, axis=1) def send_data(self, value): point = Point("prediction").field("prediction", value).time(datetime.now()) with self.client.write_api(write_options=SYNCHRONOUS) as write_api: write_api.write(point) def __enter__(self): self.client = InfluxDBClient.from_config_file( self.config["influx"]["config"], debug=self.config[self.name]["debug"].lower() in ['true', 't', 'yes']) return self def __exit__(self, type, value, traceback): self.client.close() if __name__ == '__main__': conf = default_config() conf['DEFAULT']['bucket'] = 'hist_data' conf['DEFAULT']['start'] = '-60d' adapt = InfluxSensorData(conf, 'DEFAULT') data = adapt.get_data() print(data)
async def main(): """ Main function of the application. :return: Nothing. """ print_header() timer_main = Timer() config = default_config() # read and prepare dataset for training df_timeseries_complete = load_dataset("zurich_adapter", config) df_timeseries = chop_first_fringe( df_timeseries_complete) # Chop first improper filled rows imputed_timeseries = impute_simple_imputer(df_timeseries) smooth_timeseries = moving_average(imputed_timeseries) smooth_timeseries.dropna( inplace=True ) # Make sure there really is no empty cell anymore, else drop row # Split training/testing data in 80%/20% df_train_val, df_test = temporal_train_test_split(smooth_timeseries, test_size=.20) # Define all models at our disposal models = [ ModelHolder(name="arima", trainer=train_or_load_ARIMA, config=config), ModelHolder(name="autoarima", trainer=train_or_load_autoARIMA, config=config), ModelHolder(name="expsmooting", trainer=train_or_load_expSmoothing, config=config), ModelHolder(name="lstm", trainer=train_or_load_LSTM, config=config), ModelHolder(name="lstm_seq", trainer=train_or_load_LSTM, config=config) ] # Train the models trained_models = await gather(*[ to_thread(train_model, model=model, data=df_train_val) for model in models ]) [model.model.store(model.config) for model in trained_models ] # Stores if not existing. Does NOT OVERWRITE!!! # Test the generalization performance of our models forecast_test = [ model.model.predict(x=df_test, fh=5) for model in trained_models ] print(forecast_test) # plt.plot(forecast_test[0][['Zch_Stampfenbachstrasse.PM10', 'Zch_Stampfenbachstrasse.PM10_Pred']]) # plt.plot(forecast_test[0][['Zch_Stampfenbachstrasse.Humidity', 'Zch_Stampfenbachstrasse.Temperature']]) # plt.show() logger.info(f"Script completed in {timer_main}.") logger.info("Terminating gracefully...") logger.info("start predicting new time") forecast_dict = { "arima": pd.Series(), "autoarima": pd.Series(), "expsmoothing": pd.Series(), "lstm": pd.Series(), "lstm_seq": pd.Series() } with InfluxSensorData(config=config, name="influx") as client: # Load the data from the server data = client.get_data().rename( columns={ "humidity": "Live.Humidity", "pm10": "Live.PM10", "temperature": "Live.Temperature" }) imputed_data = impute_simple_imputer(data) # Impute avg_data = moving_average(imputed_data) # Average input logger.debug("Forecasting") forecast_list = [ model.model.predict(x=avg_data, fh=5) for model in trained_models ] # Make predictions logger.info(forecast_list) forecast_dict = { "arima": forecast_list[0], "autoarima": forecast_list[1], "expsmoothing": forecast_list[2], "lstm": forecast_list[0].iloc[:, forecast_list[0].columns. get_loc("Live.PM10_Pred")], # was item 3 "lstm_seq": forecast_list[1].iloc[:, forecast_list[1].columns.get_loc( "Live.PM10_Pred")] # was item 4 } forecast = pd.DataFrame(data=forecast_dict) logger.debug(forecast) forecast = forecast.mean(axis=1).head(n=50) forecast.name = "forecast" logger.info(f"Forcasting finished with forecast value\n {forecast}") config["influx"]["limit"] = "150" config["influx"][ "drops"] = '["pm1", "pm4.0", "pm2.5", "result", "table", "_time", "humidity", "temperature"]' with InfluxSensorData(config=config, name="influx") as client: # Load the data from the server data = client.get_data().tail(n=50) data.index = range(len(data)) data = data.iloc[:, 0] print(f"data {data}") sns.set_theme(style="darkgrid") sns.lineplot(data=[forecast, data])
def graph_typical_day(): """ Graphs the typical day down to a 90th percentile """ _from = "2020-05-04 00:00:00" _to = "2021-05-03 00:00:00" config = default_config() adapter_hist = HistDataAdapter(config, "graph_typical_day") hist_data = adapter_hist.get_data(output=False).drop( labels=["Zch_Stampfenbachstrasse.PM2.5"], axis=1) hist_data.set_index("date", inplace=True) hist_data = hist_data[hist_data.index.to_series() <= _to] hist_data = hist_data[hist_data.index.to_series() >= _from] data_pm10 = {i: [] for i in range(24)} data_humidity = {i: [] for i in range(24)} data_temperature = {i: [] for i in range(24)} for line in hist_data.to_records(): hour = pd.Timestamp(line["date"]).hour hum = line["Zch_Stampfenbachstrasse.Humidity"] temp = line["Zch_Stampfenbachstrasse.Temperature"] pm10 = line["Zch_Stampfenbachstrasse.PM10"] if not np.isnan(pm10): data_pm10[hour].append(pm10) if not np.isnan(hum): data_humidity[hour].append(hum) if not np.isnan(temp): data_temperature[hour].append(temp) x_pm10 = [] y_pm10 = [] x_humidity = [] y_humidity = [] x_temperature = [] y_temperature = [] for x in range(24): for y in data_pm10[x]: x_pm10.append(x) y_pm10.append(y) for y in data_humidity[x]: x_humidity.append(x) y_humidity.append(y) for y in data_temperature[x]: x_temperature.append(x) y_temperature.append(y) sns.set_theme(style="darkgrid") plot = sns.relplot(x=x_pm10, y=y_pm10, kind="line", ci="sd") plot.set(xlabel="Time of Day", ylabel="PM10 [µg/m³]", title="Typical Day of PM10") plot.tight_layout() plot.savefig("typical_pm10.png") sns.set_theme(style="darkgrid") plot = sns.relplot(x=x_humidity, y=y_humidity, kind="line", ci="sd") plot.set(xlabel="Time of Day", ylabel="Humidity [%]", title="Typical Day of Humidity") plot.tight_layout() plot.savefig("typical_humidity.png") sns.set_theme(style="darkgrid") plot = sns.relplot(x=x_temperature, y=y_temperature, kind="line", ci="sd") plot.set(xlabel="Time of Day", ylabel="Temperature [°C]", title="Typical Day of Temperature") plot.tight_layout() plot.savefig("typical_temperature.png")