def highWinds(): df = pp.getEdayData() all = len(df) high_wind = len(df[df["Wind Mean (M/S)"] > 25]) print("All data points", all) print("Data points with wind over 25 m/s", high_wind) print("Which is {:.2f}%".format(high_wind / all * 100))
def calculatePowerCurve(): df = pp.getEdayData() df_full = pp.getSingleDataframe(fromPickle=True) start = datetime.strptime("2018-12-01", '%Y-%m-%d') stop = datetime.strptime("2019-01-01", '%Y-%m-%d') df = df[["Wind Mean (M/S)", "Power Mean (Kw)"]].round().groupby("Wind Mean (M/S)").mean() for r in df.values: print(r[0])
def buildEdayWindOrkneyGenScatter(start_limit=0, stop_limit=0, zones=0, save_to_pdf=False, filename="eday-scatter", curtail_code=0, color="k", wind_limit=40): if start_limit != 0: start_limit = datetime.strptime(start_limit, '%Y-%m-%d') if stop_limit != 0: stop_limit = datetime.strptime(stop_limit, '%Y-%m-%d') df = pp.getEdayData() df_full = pp.getSingleDataframe(fromPickle=True, clean=True) df = df_full.join(df, how="inner") df = df[df["Wind Mean (M/S)"] < wind_limit] if curtail_code == 1: df = df[df["Zone 1"] == 1] elif curtail_code == 2: df = df[df["Zone 1"] == 0] elif curtail_code == 3: df = df[df["Curtailment"] == 1] elif curtail_code == 4: df = df[df["Curtailment"] == 0] full_mean = df[["Wind Mean (M/S)", "Generation" ]].round().groupby("Wind Mean (M/S)").median().values[:, 0] powercurve = interp1d(range(0, len(full_mean)), full_mean, fill_value="extrapolate") r2 = r2_score(df[["Generation"]], df[["Wind Mean (M/S)"]].apply(powercurve)) print(full_mean) print("R^2 score:", r2) plt.scatter(df["Wind Mean (M/S)"], df["Generation"], c=color, alpha=0.5, s=2, marker="x") plt.plot(full_mean, "bx-", markersize=4, linewidth=1) plt.xlabel("Wind Speed from ERE Turbine (M/S)") plt.xlim(0, 40) plt.xticks([0, 5, 10, 15, 20, 25, 30, 35, 40]) plt.ylim(0, 40) plt.ylabel("Power Generated in Orkney (MW)") plt.legend(["Estimated Power Curve", "Data"]) fig = plt.gcf() fig.set_size_inches(3.9, 3.2) fig.tight_layout() #plt.title("Relation between windspeeds and generation for Eday 900kW Turbine") if save_to_pdf: fig.savefig("./plots/" + filename + ".pdf") else: plt.show() plt.clf()
def makeDescriptiveDataset(start, stop, clean=True, eday=False): df = pp.getSingleDataframe(start, stop, fromPickle=True) if eday: df_eday = pp.getEdayData() df = df.join(df_eday, how="inner") df = pp.addReducedCol(df) df.dropna(inplace=True) # Remove NaN entries. if clean: print("Cleaning data...") df = pp.cleanData(df) df = pp.addReducedCol(df, clean=True) df = pp.removeGlitches(df) return df
def buildEdayScatter(start_limit=0, stop_limit=0, zones=0, save_to_pdf=False, filename="eday-scatter", curtail_code=0, color="k", width=4, powercurve=False): if start_limit != 0: start_limit = datetime.strptime(start_limit, '%Y-%m-%d') if stop_limit != 0: stop_limit = datetime.strptime(stop_limit, '%Y-%m-%d') df = pp.getEdayData() df = df.loc[start_limit:stop_limit][["Wind Mean (M/S)", "Power Mean (Kw)"]] df_full = pp.getSingleDataframe(fromPickle=True) df = df_full.join(df, how="inner") df = pp.addReducedCol(df, clean=True) if curtail_code == 1: df = df[df["Zone 1"] == 1] elif curtail_code == 2: df = df[df["Zone 1"] == 0] elif curtail_code == 2: df = df[df["Curtailment"] == 1] eday_curve = [ 0, 0, 0.5, 4, 19, 60, 101, 160, 253, 404, 532, 687, 820, 870, 890, 900, 900, 900, 900, 900, 900, 900, 900, 900, 900, 900 ] plt.scatter(df["Wind Mean (M/S)"], df["Power Mean (Kw)"], c=color, alpha=0.5, s=2) if powercurve: plt.plot(eday_curve, "r-") plt.xlabel("Wind Speed (M/S)") plt.xlim(0, 40) plt.xticks([0, 5, 10, 15, 20, 25, 30, 35, 40]) plt.ylabel("Power Generated (kW)") plt.yticks([0, 100, 200, 300, 400, 500, 600, 700, 800, 900]) if powercurve: plt.legend(["Power curve"], loc=1) #plt.title("Relation between windspeeds and generation for Eday 900kW Turbine") fig = plt.gcf() fig.set_size_inches(width, width * 0.75) fig.tight_layout() if save_to_pdf: fig.savefig("./plots/eday/" + filename + ".pdf") else: plt.show() plt.clf()
def ANNCertainty(start="2019-04-01", stop="2019-05-01", fromPickle=False, clean=True, load_model=False): if clean: filename = "ANNCertainty" else: filename = "ANNCertainty-uncleaned" if fromPickle: print("Loaded", filename) return pickle.load(open(config.DATA_PATH + "" + filename, "rb")) else: print("Making Met-ANM dataset for certaintyPlot", filename) met_df = pp.getMetData(start, stop).set_index("forecast_time") anm_df = pp.getSingleDataframe(start, "2019-05-31", fromPickle=True, clean=clean) df = anm_df.join(met_df, how="inner") if load_model: ere_wtnn = m.load(filename=filename) else: df_train = pp.getEdayData() df_full = pp.getSingleDataframe(fromPickle=True, clean=clean) df_train = df_full.join(df_train, how="inner") ere_wtnn = m.train_and_save_simple( df_train[["Wind Mean (M/S)", "weekday", "hour"]].values, df_train[["Curtailment"]].values, kfold=False, filename=filename) print("Doing ERE WT-FFNN predictions...") df["ere_wtnn_prediction"] = [ ere_wtnn.predict([[d[["wind_speed", "weekday", "hour"]].values]])[0][0] for i, d in df.iterrows() ] df["ere_wtnn_prediction_correct"] = [ int(round(d["ere_wtnn_prediction"]) == d["Curtailment"]) * 100 for i, d in df.iterrows() ] print(df["ere_wtnn_prediction_correct"].mean()) pickle.dump(df, open(config.DATA_PATH + "" + filename, "wb")) return df
def buildWindWindScatter(start, stop, filename="wind-wind-scatter", save_to_pdf=False): df_eday = pp.getEdayData() df_full = pp.getSingleDataframe(start, stop, fromPickle=True) start = datetime.strptime(start, '%Y-%m-%d') stop = datetime.strptime(stop, '%Y-%m-%d') df_eday = df_eday.loc[start:stop] df_full = df_full.loc[start:stop] df = df_full.join(df_eday, how="inner")[["speed", "Wind Mean (M/S)"]] df = pp.removeGlitches(df) model = LinearRegression() model.fit(df[["speed"]], df[["Wind Mean (M/S)"]]) preds = model.predict(df[["speed"]]) coef = str(round(model.coef_[0][0], 3)) bias = str(round(model.intercept_[0], 3)) print("Coef and bias:", coef, bias) print("R^2 score:", r2_score(df[["Wind Mean (M/S)"]], preds)) #Plot outputs plt.scatter(df[["speed"]], df[["Wind Mean (M/S)"]], color='black', alpha=0.5, s=2, marker="x") plt.plot(df[["speed"]], preds, color='blue', linewidth=1) plt.xlabel("Wind Speed from OpenWeatherMap") plt.ylabel("Wind Speed from ERE Turbine (M/S)") plt.xlim(0, 25) plt.ylim(0, 25) plt.legend(["$y = " + coef + "x + " + bias + "$", "Data"]) fig = plt.gcf() fig.set_size_inches(3.3, 3) fig.tight_layout() #plt.title("Relation between windspeeds and generation for Eday 900kW Turbine") if save_to_pdf: fig.savefig("./plots/" + filename + ".pdf") else: plt.show() plt.clf()
def plotPowerCurves(): df = pp.getEdayData() start = datetime.strptime("2018-12-01", '%Y-%m-%d') stop = datetime.strptime("2019-01-01", '%Y-%m-%d') full_median = df[["Wind Mean (M/S)", "Power Mean (Kw)" ]].round().groupby("Wind Mean (M/S)").median() december_median = df.loc[start:stop][[ "Wind Mean (M/S)", "Power Mean (Kw)" ]].round({ "Wind Mean (M/S)": 0 }).groupby("Wind Mean (M/S)").median().values[:, 0] enercon_curve = [ 0.0, 0, 1, 6, 18, 42, 83, 147, 238, 340, 466, 600, 710, 790, 850, 880, 900, 900, 900, 900, 900, 900, 900, 900, 900, 900 ] eday_curve = [ 0, 0, 0.5, 4, 19, 60, 101, 160, 253, 404, 532, 687, 820, 870, 890, 900, 900, 900, 900, 900, 900, 900, 900, 900, 900, 900 ] plt.plot(enercon_curve, "s:", alpha=0.8, markersize=3) plt.plot(eday_curve, "x:", alpha=0.8, markersize=3) plt.plot(full_median, "o:", alpha=0.8, markersize=3) #plt.plot(full_median, "^-", alpha=0.8, markersize=3) plt.plot(december_median, "v:", alpha=0.8, markersize=3) plt.xlabel("Wind Speed (M/S)") plt.xlim(0, 40) plt.xticks([0, 5, 10, 15, 20, 25, 30, 35, 40]) plt.ylabel("Power Generated (kW)") plt.yticks([0, 100, 200, 300, 400, 500, 600, 700, 800, 900]) plt.legend(["Enercon", "ERE", "Winter 2018/19", "December 2018"], loc=8) fig = plt.gcf() fig.set_size_inches(4.9, 3) fig.tight_layout() fig.savefig('./plots/eday_power_curves.pgf') plt.clf()
def buildWindsGraph(start_limit=0, stop_limit=0, zones=0): df = pp.getSingleDataframe( start_limit, stop_limit, fromPickle=True).resample("10min").mean().interpolate(method='linear') if start_limit != 0: start_limit = datetime.strptime(start_limit, '%Y-%m-%d') if stop_limit != 0: stop_limit = datetime.strptime(stop_limit, '%Y-%m-%d') df_eday = pp.getEdayData() df_eday = df_eday.loc[start_limit:stop_limit] fig = plt.figure() ax1 = fig.add_axes([0.1, 0.15, 0.85, 0.8]) ax1.plot(df.index, df["speed"], "k-", linewidth=1, alpha=0.8) ax1.plot(df_eday.index, df_eday["Wind Mean (M/S)"], "b-", linewidth=1, alpha=0.8) ax1.set_xlabel("Time") ax1.set_ylabel("M/S") ax1.grid(b=True, which="both", axis="y") ax1.tick_params(axis="x", which="minor") ax1.grid(b=True, which="major", axis="x", linestyle="-.") ax1.grid(b=True, which="minor", axis="x", linestyle="--") ax1.legend(["OpenWeatherMap", "Eday Turbine"], loc=1) plt.title("Wind speed comparison") fig.autofmt_xdate(which="both") fig.set_size_inches(15, 8) plt.xticks(rotation=-60) plt.show() plt.clf()
def buildModelGraph(start_limit=0, stop_limit=0, zones=0, filename="model-comparison", save_to_pdf=False): df_eday = pp.getEdayData() #Full DataSet, used for training try: df_full = pp.getSingleDataframe("2018-12-01", "2019-03-01", fromPickle=True) except FileNotFoundError: df_full = pp.getSingleDataframe("2018-12-01", "2019-03-01", fromPickle=False) df_full = df_full.join(df_eday, how="inner") df_full = pp.cleanData(df_full) df_full = pp.addReducedCol(df_full, clean=True) df_full = pp.removeGlitches(df_full) try: df = pp.getSingleDataframe(start_limit, stop_limit, fromPickle=True) except FileNotFoundError: df = pp.getSingleDataframe(start_limit, stop_limit, fromPickle=False) #df = df.join(df_eday, how="inner") df = pp.cleanData(df) df = pp.addReducedCol(df, clean=True) df = pp.removeGlitches(df) if start_limit != 0: start_limit = datetime.strptime(start_limit, '%Y-%m-%d').timestamp() if stop_limit != 0: stop_limit = datetime.strptime(stop_limit, '%Y-%m-%d').timestamp() # Adjust the amount of ticks to the data size if stop_limit - start_limit > 86400 * 8: tick_zoom = 24 elif stop_limit - start_limit > 86400 * 4: tick_zoom = 12 elif stop_limit - start_limit > 86400 * 2: tick_zoom = 6 elif stop_limit - start_limit > 86400: tick_zoom = 3 else: tick_zoom = 1 model_names, accs = desc.evaluateDataframe(df_full, df) accs = accs[:, :-1] # Generate x ticks for the mesh plot meshxticks_major = [] meshxticks_minor = [] for i, d in enumerate(df.index): if d.hour == 0 and d.minute == 0: meshxticks_major.append(i) elif d.hour % tick_zoom == 0 and d.minute == 0: meshxticks_minor.append(i) plt.xticks(rotation=-60) fig = plt.figure() # Bottom plot ax1 = fig.add_axes([0.10, 0.1, 0.9, 0.44]) delta = (df["Generation"] - df["Demand"]) #.rolling(3).mean() ax1.plot(df.index, delta, "k-", linewidth=1, alpha=0.8) plt.fill_between(df.index, delta, color="k", alpha=0.3) ax1.margins(x=0) ax1.set_ylabel("MegaWatt") ax1.set_ylim(-25, 25) ax1.set_yticks([-20, -10, 0, 10, 20]) ax1.grid(b=True, which="both", axis="y") ax1.xaxis.set_major_locator(mdates.DayLocator()) ax1.xaxis.set_minor_locator(mdates.HourLocator()) ax1.xaxis.set_major_formatter(mdates.DateFormatter("%b %d")) ax1.xaxis.set_minor_formatter(mdates.DateFormatter("%H:00")) for i, t in enumerate(ax1.xaxis.get_minor_ticks()): if i % 24 == 0: t.label.set_visible(False) if i % tick_zoom != 0: t.set_visible(False) ax1.tick_params(axis="x", which="minor") ax1.grid(b=True, which="major", axis="x", linestyle="-.") ax1.grid(b=True, which="minor", axis="x", linestyle="--") ax1.legend(["Generation relative to Demand"], loc=1) # Top plot cm = plt.get_cmap("binary") ax2 = fig.add_axes([0.10, 0.56, 0.9, 0.44]) ax2.pcolormesh(accs, alpha=1, cmap=cm, snap=True) ax2.set_xticks(meshxticks_major) ax2.set_xticks(meshxticks_minor, minor=True) ax2.xaxis.set_ticklabels([]) ax2.grid(b=True, which="major", axis="x", linestyle="-.") ax2.grid(b=True, which="minor", axis="x", linestyle="--") ax2.set_yticks(np.arange(len(model_names)) + 0.5) ax2.set_yticks(np.arange(len(model_names)), minor=True) ax2.set_yticklabels(model_names, rotation=0, fontsize="8", va="center") ax2.grid(b=True, which="minor", axis="y") custom_lines = [ Line2D([0], [0], color=cm(0), lw=4), Line2D([0], [0], color=cm(1.), lw=4) ] ax2.legend(custom_lines, ["No curtailment", "Curtailment"], loc=1) #plt.title("Generation relative to demand for all of Orkney. \nAccuracies for models: " + ", ".join(model_names)) fig.autofmt_xdate(which="both") fig.set_size_inches(8, 3) if save_to_pdf: fig.savefig("./plots/" + filename + ".pdf") else: plt.show() plt.clf()
def calculateLoss(): df = pp.getEdayData() df_full = pp.getSingleDataframe(fromPickle=True) df = df_full.join(df, how="inner") eday_powercurve_discrete = [ 0, 0, 0.5, 4, 19, 60, 101, 160, 253, 404, 532, 687, 820, 870, 890, 900, 900, 900, 900, 900, 900, 900, 900, 900, 900, 900, 900, 900, 900, 900, 900, 900, 900, 900, 900, 900, 900, 900, 900, 900, 900, 900, 900, 900, 900 ] eday_powercurve = interp1d(range(0, len(eday_powercurve_discrete)), eday_powercurve_discrete) start = datetime.strptime("2018-12-01", '%Y-%m-%d') stop = datetime.strptime("2019-01-01", '%Y-%m-%d') december_median = df.loc[start:stop][[ "Wind Mean (M/S)", "Power Mean (Kw)" ]].round({ "Wind Mean (M/S)": 0 }).groupby("Wind Mean (M/S)").median().values[:, 0] december_powercurve = interp1d(range(0, len(december_median)), december_median, bounds_error=False, fill_value=0) power_sum = 0 #716131 for r in df["Power Mean (Kw)"].values: power_sum += r * (1 / 6) print("Power Output: {:.2f} kWh".format(power_sum)) expected_power_sum = 0 for r in df["Wind Mean (M/S)"].values: expected_power_sum += eday_powercurve(r) * (1 / 6) print("Expected Power Output: {:.2f} kWh".format(expected_power_sum)) loss = expected_power_sum - power_sum print("Loss: {:.2f} kWh ({:.2f}%)".format(loss, loss / expected_power_sum * 100)) expected_power_sum = 0 for r in df["Wind Mean (M/S)"].values: expected_power_sum += december_powercurve(r) * (1 / 6) print("Expected Power Output (new curve): {:.2f} kWh".format( expected_power_sum)) loss = expected_power_sum - power_sum print("Loss (new curve): {:.2f} kWh ({:.2f}%)".format( loss, loss / expected_power_sum * 100)) eday_expected_power_sum = 0 for r in df["Wind Mean (M/S)"].values: eday_expected_power_sum += eday_powercurve(ceil(r)) * (1 / 6) print("Expected Power Output (eday method): {:.2f} kWh".format( eday_expected_power_sum)) eday_loss = eday_expected_power_sum - power_sum print("Loss (eday method): {:.2f} kWh ({:.2f}%)".format( eday_loss, eday_loss / eday_expected_power_sum * 100)) print() print("------ Excluding measurements with no curtailment in Zone 1") df = df[df["Zone 1"] == 1] cur_power_sum = 0 #716131 for r in df["Power Mean (Kw)"].values: cur_power_sum += r * (1 / 6) print("Power Output: {:.2f} kWh".format(cur_power_sum)) cur_expected_power_sum = 0 for r in df["Wind Mean (M/S)"].values: cur_expected_power_sum += eday_powercurve(ceil(r)) * (1 / 6) print("Expected Power Output (eday method): {:.2f} kWh".format( cur_expected_power_sum)) cur_loss = cur_expected_power_sum - cur_power_sum print("Loss (eday method): {:.2f} kWh ({:.2f}% locally, {:.2f}% of total)". format(cur_loss, cur_loss / cur_expected_power_sum * 100, cur_loss / eday_expected_power_sum * 100)) print() print("Difference in loss: {:.2f} / {:.2f} = {:.2f}%".format( cur_loss, eday_loss, cur_loss / eday_loss * 100))
def evaluateMetForecast(start="2019-04-01", stop="2019-05-01", name="met-full-frame", code=0, load_partial=False, load_full=False): if load_full: if load_partial: print("Making Met-ANM dataset") met_df = pp.getMetData(start, stop).set_index("forecast_time") anm_df = pp.getSingleDataframe(start, "2019-05-31", fromPickle=True, clean=True) df = anm_df.join(met_df, how="inner") df["prediction"] = [ int( desc.correlationModelKCurve(d["wind_speed"], i.weekday() + 1, i.hour + 1, 6)) for i, d in df.iterrows() ] df["prediction_correct"] = [ int(d["prediction"] == d["Curtailment"]) * 100 for i, d in df.iterrows() ] df["ere_prediction"] = [ int( desc.correlationModelKCurveEday(d["wind_speed"], i.weekday() + 1, i.hour + 1, 6)) for i, d in df.iterrows() ] df["ere_prediction_correct"] = [ int(d["ere_prediction"] == d["Curtailment"]) * 100 for i, d in df.iterrows() ] df["speed_delta"] = [ d["wind_speed"] - d["speed"] for i, d in df.iterrows() ] else: df = pickle.load(open(config.DATA_PATH + "" + name, "rb")) df_train = pp.getEdayData() df_full = pp.getSingleDataframe(fromPickle=True, clean=True) df_train = df_full.join(df_train, how="inner") if code == 1 or code == 0: percep = m.train_and_save_perceptron( df_train[["speed", "weekday", "hour"]].values, df_train[["Curtailment"]].values, kfold=False, filename="WT-Percep-" + name) print("Doing WT-Percep predictions...") df["percep_prediction"] = [ percep.predict([[d[["wind_speed", "weekday", "hour"]].values]])[0][0] for i, d in df.iterrows() ] df["percep_prediction_correct"] = [ int(round(d["percep_prediction"]) == ceil(d["Curtailment"])) * 100 for i, d in df.iterrows() ] print("Clearing Keras session") del percep clear_session() if code == 2 or code == 0: wtnn = m.train_and_save_simple( df_train[["speed", "weekday", "hour"]].values, df_train[["Curtailment"]].values, kfold=False, filename="WT-FFNN-" + name) print("Doing WT-FFNN predictions...") df["wtnn_prediction"] = [ wtnn.predict([[d[["wind_speed", "weekday", "hour"]].values]])[0][0] for i, d in df.iterrows() ] df["wtnn_prediction_correct"] = [ int(round(d["wtnn_prediction"]) == ceil(d["Curtailment"])) * 100 for i, d in df.iterrows() ] print("Clearing Keras session") del wtnn clear_session() if code == 3 or code == 0: ere_percep = m.train_and_save_perceptron( df_train[["Wind Mean (M/S)", "weekday", "hour"]].values, df_train[["Curtailment"]].values, kfold=False, filename="WT-Percep-ERE-" + name) print("Doing ERE WT-Percep predictions...") df["ere_percep_prediction"] = [ ere_percep.predict( [[d[["wind_speed", "weekday", "hour"]].values]])[0][0] for i, d in df.iterrows() ] df["ere_percep_prediction_correct"] = [ int( round(d["ere_percep_prediction"]) == ceil( d["Curtailment"])) * 100 for i, d in df.iterrows() ] print("Clearing Keras session") del ere_percep clear_session() if code == 4 or code == 0: ere_wtnn = m.train_and_save_simple( df_train[["Wind Mean (M/S)", "weekday", "hour"]].values, df_train[["Curtailment"]].values, kfold=False, filename="WT-FFNN-ERE-" + name) print("Doing ERE WT-FFNN predictions...") df["ere_wtnn_prediction"] = [ ere_wtnn.predict( [[d[["wind_speed", "weekday", "hour"]].values]])[0][0] for i, d in df.iterrows() ] df["ere_wtnn_prediction_correct"] = [ int(round(d["ere_wtnn_prediction"]) == ceil(d["Curtailment"])) * 100 for i, d in df.iterrows() ] print("Clearing Keras session") del ere_wtnn clear_session() pickle.dump(df, open(config.DATA_PATH + "" + name, "wb")) else: print("Loading full met frame from", config.DATA_PATH + "" + name) df = pickle.load(open(config.DATA_PATH + "" + name, "rb")) hours = df.groupby("hours_forecast") accs = [] accs.append(hours["prediction_correct"].describe()) accs.append(hours["ere_prediction_correct"].describe()) accs.append(hours["percep_prediction_correct"].describe()) accs.append(hours["ere_percep_prediction_correct"].describe()) accs.append(hours["wtnn_prediction_correct"].describe()) accs.append(hours["ere_wtnn_prediction_correct"].describe()) names = [ "$WT_6$", "$WT_6$ - ERE power curve", "WT-Percep", "WT-Percep - ERE wind data", "WT-FFNN", "WT-FFNN - ERE wind data" ] pickle.dump(accs, open(config.DATA_PATH + "" + name + "-describes", "wb")) pickle.dump(names, open(config.DATA_PATH + "" + name + "-describes-name", "wb")) return accs, names