def main(): K = list(layouts.ahu_layout.keys()) df = utils.prep_dataframe(keep=K) df_air_on = df[['ahu_1_air_on', 'ahu_2_air_on', 'ahu_3_air_on', 'ahu_4_air_on']] X_air_on = df_air_on.values df_outlet = df[['ahu_1_outlet', 'ahu_2_outlet', 'ahu_3_outlet', 'ahu_4_outlet']] X_outlet = df_outlet.values df_inlet = df[['ahu_1_inlet', 'ahu_2_inlet', 'ahu_3_inlet', 'ahu_4_inlet']] X_inlet = df_inlet.values df_inlet_rh = df[['ahu_1_inlet_rh', 'ahu_2_inlet_rh', 'ahu_3_inlet_rh', 'ahu_4_inlet_rh']] X_inlet_rh = df_inlet_rh.values df_power = df[['ahu_1_power', 'ahu_2_power', 'ahu_3_power', 'ahu_4_power']] X_power = df_power.values room_cooling = df['room_cooling_power_(kw)'] plt.scatter(X_air_on[:, 0] - X_outlet[:, 0], X_power[:, 0]) plt.show()
df = pd.read_csv('./data/emails.csv') # Pre-process complete dataset #df_parsed = pd.DataFrame(list(map(get_email_from_string, df['message']))) # Subset emails in sent folders only sent = df.loc[df['file'].str.contains('sent')] # From sent subset extract and process emails of Kaminski kaminski = sent.loc[sent['file'].str.contains('kaminski-v')] kaminski_parsed = pd.DataFrame( list(map(get_email_from_string, kaminski['message']))) kaminski_parsed.dropna(subset=['To'], inplace=True) kaminski_parsed.to_csv('./data/kaminski_parsed.csv', index=False) kaminski_comm = prep_dataframe(kaminski_parsed) kaminski_comm['recipients'] = kaminski_comm['recipients'].apply( lambda x: x.split(',')) kaminski_full = get_pairwise_communication(kaminski_comm['sender'], kaminski_comm['recipients']) kaminski_full['recipient'] = kaminski_full['recipient'].str.strip(' \n\t') kaminski_sender = kaminski_full[kaminski_full['sender'].str.contains( 'vince.kaminski.enron.com|[email protected]')] to_kaminski = kaminski_full[kaminski_full['recipient'].str.contains( 'vince.kaminski.enron.com|[email protected]')] to_kaminski_edges = to_kaminski.value_counts(['sender', 'recipient']) to_kaminski_edges = to_kaminski_edges.reset_index() to_kaminski_edges = to_kaminski_edges.rename(columns={0: 'num_emails'})
def main(method, transform, temporal, layout, hybrid, threshold, output): variables = getattr(layouts, layout) K = list(variables.keys()) df = utils.prep_dataframe(keep=K) if temporal: df_shifted = utils.create_shifted_features(df) df = df.join(df_shifted, how="outer") df = df.dropna() if transform: print("* Tranform data") X = tr.to_normal(df.values) df = pd.DataFrame(X, index=df.index.values, columns=df.columns.values) X = df.values if hybrid: model = HRF(k=5, k_star=10, variables_names=df.columns.values) else: model = GMRF(method=method[0]) model.fit(X) if not hybrid: print("* Selected alpha = {}".format(model.alpha_)) else: print("* Selected k = {}, k star = {}".format(model.k, model.k_star)) if threshold: Q = model.precision_.copy() ts = np.arange(0., 1., 0.001) bics = np.empty(len(ts)) connectivity = np.empty(len(ts)) n = Q.shape[0] gmrf_test = GMRF() gmrf_test.mean_ = np.mean(X, axis=0) for i, t in enumerate(ts): Q[Q < t] = 0 gmrf_test.precision_ = Q bics[i], _ = gmrf_test.bic(X) connectivity[i] = 1 - np.size(np.where(Q == 0)[0]) / (n * n) fig, (ax, ax1) = plt.subplots(2, 1) ax.plot(connectivity) ax1.plot(bics) if output: results_name = os.path.join(os.path.dirname(__file__), "../results/") if hybrid: BNs = np.empty(len(model.variables_names), dtype=object) for i in range(len(BNs)): BNs[i] = (model.bns[i].variables_names, model.bns[i].nodes, model.bns[i].edges) np.save(results_name + output + "_bns", BNs) else: np.save(results_name + output + "_prec", model.precision_) np.save(results_name + output + "_mean", model.mean_) np.save(results_name + output + "_bic_scores", model.bic_scores) if not hybrid: plt.figure() plt.plot(model.bic_scores) fig, ax = plt.subplots(1, 1) pl.bin_precision_matrix(model.precision_, df.columns.values, ax) plt.show()
import os import sys import utils import layouts import numpy as np import pandas as pd K = list(layouts.datacenter_layout.keys()) df = utils.prep_dataframe(keep=K) df_shifted = utils.create_shifted_features(df) df = df.join(df_shifted, how="outer") df = df.dropna() kf_scores = np.load("../results/gmrfNone_kf_scores.npy") r2 = np.load("../results/gmrfNone_r2.npy") kf_scores_hybrid = np.load("../results/hybridNone_kf_scores.npy") r2_hybrid = np.load("../results/hybridNone_r2.npy") def tables(data, r2, name): print(name.upper()) columns = ["\textbf{Variables}", "\textbf{MAD t = 1}", "\textbf{MAD t = 4}", "\textbf{MAD t = 8}", "\textbf{$R^2$ = 0}"] table = pd.DataFrame(columns=columns) j = 0 mean = np.mean(data, axis=0) std = np.std(data, axis=0) for i, n in enumerate(df.columns.values): if 'l1_' not in n: v = ((n.replace("_", " ")).upper())[:5]
def main(): K = list(layouts.ahu_layout.keys()) df = utils.prep_dataframe(keep=K) df_air_on = df[['ahu_1_air_on', 'ahu_2_air_on', 'ahu_3_air_on', 'ahu_4_air_on']] X_air_on = df_air_on.values df_outlet = df[['ahu_1_outlet', 'ahu_2_outlet', 'ahu_3_outlet', 'ahu_4_outlet']] X_outlet = df_outlet.values df_inlet = df[['ahu_1_inlet', 'ahu_2_inlet', 'ahu_3_inlet', 'ahu_4_inlet']] X_inlet = df_inlet.values df_inlet_rh = df[['ahu_1_inlet_rh', 'ahu_2_inlet_rh', 'ahu_3_inlet_rh', 'ahu_4_inlet_rh']] X_inlet_rh = df_inlet_rh.values df_power = df[['ahu_1_power', 'ahu_2_power', 'ahu_3_power', 'ahu_4_power']] X_power = df_power.values cooling_shifted = df['room_cooling_power_(kw)'].shift(1) cooling_shifted = cooling_shifted.dropna() linreg = LinearRegression(normalize=True) coefs = np.empty(5) intercepts = np.empty(5) p = 0 powers = [] mean_power = np.empty(5) for i in range(4): if i != 2: X = X_air_on[:, i].reshape(X_air_on.shape[0], 1) Y = X_outlet[:, i].reshape(X_outlet.shape[0], 1) linreg.fit(X, Y) print("{} Linear regression: intercept = {}, coef = {}" .format(i, linreg.intercept_, linreg.coef_)) coefs[p] = linreg.coef_[0][0] intercepts[p] = linreg.intercept_[0] powers.append(X_power[:, i]) mean_power[p] = np.mean(X_power[:, i]) p += 1 else: indices = np.array([np.where(X_outlet[:, i] < 21.5)[0], np.where(X_outlet[:, i] > 23.3)[0]]) for j in range(indices.shape[0]): X = X_air_on[:, i][indices[j]].reshape(np.size(indices[j]), 1) Y = X_outlet[:, i][indices[j]].reshape(np.size(indices[j]), 1) linreg.fit(X, Y) print("{} Linear regression: intercept = {}, coef = {}" .format(i, linreg.intercept_, linreg.coef_)) coefs[p] = linreg.coef_[0][0] intercepts[p] = linreg.intercept_[0] powers.append(X_power[:, i][indices[j]]) mean_power[p] = np.mean(X_power[:, i][indices[j]]) p += 1 p = 0 c = ['b', 'g', 'r', 'm'] scatters = [] plt.figure() s0 = plt.scatter(X_air_on[:, 0], X_outlet[:, 0], c=c[0]) s1 = plt.scatter(X_air_on[:, 1], X_outlet[:, 1], c=c[1]) s2 = plt.scatter(X_air_on[:, 2], X_outlet[:, 2], c=c[2]) s3 = plt.scatter(X_air_on[:, 3], X_outlet[:, 3], c=c[3]) plt.xlabel('Air on') plt.ylabel('Outlet') plt.legend((s0, s1, s2, s3), ("Ahu 1", "Ahu 2", "Ahu 3", "Ahu 4")) # if i != 2: # x = np.sort(X_air_on[:, i]) # x = np.arange(10, 30) # y = coefs[p] * x + intercepts[p] # plt.plot(x, y, 'r-') # p += 1 # else: # indices = np.array([np.where(X_outlet[:, i] < 21)[0], # np.where(X_outlet[:, i] > 23.5)[0]]) # for j in range(indices.shape[0]): # x = np.sort(X_air_on[:, i][indices[j]]) # x = np.arange(10, 30) # y = coefs[p] * x + intercepts[p] # plt.plot(x, y, 'g-') # p += 1 plt.figure() for i in range(5): X = powers[i] plt.scatter(np.ones(X.shape[0]) * i, X, alpha=0.01) plt.figure() plt.scatter(intercepts, coefs, s=mean_power * 100 + 10, c=['r', 'b', 'g', 'c', 'm']) # for i in range(4): # plt.figure() # plt.scatter(df['acu_supply_temperature_(c)'] - X_air_on[:, i], X_power[:, i]) # plt.figure() # plt.scatter(np.sum(X_air_on, axis=1), np.sum(X_outlet, axis=1), s=df['room_cooling_power_(kw)']) plt.figure() plt.scatter((np.sum(X_air_on, axis=1) - np.sum(X_outlet, axis=1))[:-1], cooling_shifted) plt.xlabel("Air on - Outlet") plt.ylabel("Cooling power") # X = np.hstack((X_air_on[:, :3], X_outlet[:, :3])) #X = np.array([X_air_on[:, 3], X_outlet[:, 3]]).T X = X_air_on - X_outlet print(X.shape) Y = df['room_cooling_power_(kw)'].reshape(X.shape[0], 1) Y = X_power for degree in [0, 1, 2, 3]: kf = KFold(n=X.shape[0], n_folds=4) scores = [] for train, test in kf: model = make_pipeline(PolynomialFeatures(degree), LinearRegression()) model.fit(X[train], Y[train]) s = model.score(X[test], Y[test]) scores.append(s) print("Degree {}: score = {}".format(degree, s)) print("Mean score = {}".format(np.mean(scores))) linreg.fit(X, Y) print("Coef = {}, intercept = {}".format(linreg.coef_, linreg.intercept_)) plt.show()
def main(): K = list(layouts.datacenter_layout.keys()) df = utils.prep_dataframe(keep=K) df_shifted = utils.create_shifted_features(df) df = df.join(df_shifted, how="outer") df = df.dropna() names = list(filter(lambda x: 'l1_' not in x, df.columns.values)) # a = np.random.normal(5, 2, 3000) # d = np.random.normal(-2, 3, 3000) # b = a * 3 + 9 + np.random.normal(0, 0.2, 3000) # c = 4 * d + (-5) * b + 11 + np.random.normal(0, 0.01, 3000) # X = np.array([a, b, c, d]).T # names = ['c', 'a'] # df = pd.DataFrame.from_records(columns=np.array(['a', 'b', 'c', 'd']), data=X) X = df.values gmrf = GMRF(variables_names=df.columns.values, alpha=0.1) hrf = HRF(k=5, k_star=10, variables_names=df.columns.values) gbn = GBN(variables_names=df.columns.values) X_train, X_test = train_test_split(X, test_size=0.25) cv_scores = [] train_scores = [] pool = mp.Pool(processes=8) results = [pool.apply_async(scoring, args=(df, hrf, names, X_train[:i, :], X_test, i)) for i in range(100, X_train.shape[0], 100)] output = [p.get() for p in results] output.sort() output = [np.array(t) for t in zip(*output)] cv_scores = output[2] train_scores = output[1] # hl, = plt.plot([], []) # for i in range(100, X_train.shape[0], 100): # print("* Round {}".format(int(i / 100))) # train_score, cv_score = scoring(df, gmrf, names, X_train[:i, :], X_test, i) # cv_scores.append(cv_score) # train_scores.append(train_score) # hl.set_xdata(numpy.append(hl.get_xdata(), i)) # hl.set_ydata(numpy.append(hl.get_ydata(), train_score)) # plt.draw() # # plt.plot(cv_scores, 'bo-') # # plt.plot(train_scores, 'ro-') # # plt.draw() # plt.ioff() plt.plot(range(100, X_train.shape[0], 100), cv_scores, 'bo-') plt.plot(range(100, X_train.shape[0], 100), train_scores, 'ro-') plt.show()
# These are the "Tableau 20" colors as RGB. tableau20 = [(31, 119, 180), (174, 199, 232), (255, 127, 14), (255, 187, 120), (44, 160, 44), (152, 223, 138), (214, 39, 40), (255, 152, 150), (148, 103, 189), (197, 176, 213), (140, 86, 75), (196, 156, 148), (227, 119, 194), (247, 182, 210), (127, 127, 127), (199, 199, 199), (188, 189, 34), (219, 219, 141), (23, 190, 207), (158, 218, 229)] # Scale the RGB values to the [0, 1] range, which is the format matplotlib accepts. for i in range(len(tableau20)): r, g, b = tableau20[i] tableau20[i] = (r / 255., g / 255., b / 255.) K = list(layouts.datacenter_layout.keys()) df_static = utils.prep_dataframe(keep=K) df_shifted = utils.create_shifted_features(df_static) df = df_static.join(df_shifted, how="outer") df = df.dropna() kf_scores = np.load("../results/gmrfNone_kf_scores.npy") r2 = np.load("../results/gmrfNone_r2.npy") kf_scores_hybrid = np.load("../results/hybridNone_kf_scores.npy") r2_hybrid = np.load("../results/hybridNone_r2.npy") bns = np.load("../results/hybrid_bns.npy") # GMRF BIC plot def gmrf_bic_plot(): print("BIC plot") fig, ax = newfig(1.)
def main(alpha, transform, temporal, layout, steps, output, hybrid): variables = getattr(layouts, layout) K = list(variables.keys()) df = utils.prep_dataframe(keep=K) if temporal: df_shifted = utils.create_shifted_features(df) df = df.join(df_shifted, how="outer") df = df.dropna() names = list(filter(lambda x: 'l1_' not in x, df.columns.values)) if transform: print("* Tranform data") X = tr.to_normal(df.values) df = pd.DataFrame(X, index=df.index.values, columns=df.columns.values) if hybrid: model = HRF(variables_names=df.columns.values, k=5, k_star=10) else: model = GMRF(variables_names=df.columns.values, alpha=alpha) kf = KFold(df.shape[0], n_folds=5, shuffle=False) pool = mp.Pool(processes=5) print("* Scoring") kf_scores = [pool.apply_async(scoring, args=(df, model, names, train, test, steps, id)) for id, (train, test) in enumerate(kf)] results = [p.get() for p in kf_scores] results = [np.array(t) for t in zip(*results)] r2 = results[0] kf_scores = results[1] #variances = results[2] r2 = np.sum(r2, axis=0) / len(kf) scores = np.sum(kf_scores, axis=0) / len(kf) #var = np.sum(variances, axis=0) / len(kf) if output: results_name = os.path.join(os.path.dirname(__file__), "../results/") np.save(results_name + output + str(steps) + "_kf_scores", kf_scores) np.save(results_name + output + str(steps) + "_scores", scores) np.save(results_name + output + str(steps) + "_r2", r2) # np.save(results_name + output + str(steps) + "_var", var) labels = df.columns.values labels = list(filter(lambda x: 'ahu' not in x, labels)) if steps == 1: plt.figure() plt.boxplot(scores) plt.xticks(np.arange(1, 40), labels, rotation=90) else: plt.figure() plt.plot(scores) plt.figure() plt.plot(r2) plt.show()
def main(): K = list(layouts.ahu_layout.keys()) df = utils.prep_dataframe(keep=K) # df[['ahu_1_inlet', 'ahu_1_outlet', 'ahu_1_power']].plot() # plt.figure() # plt.scatter(df['ahu_1_inlet'] - df['ahu_1_outlet'], df['ahu_1_power']) # plt.figure() # plt.scatter(df['ahu_2_inlet'] - df['ahu_2_outlet'], df['ahu_2_power']) # plt.figure() # plt.scatter(df['ahu_3_inlet'] - df['ahu_3_outlet'], df['ahu_3_power']) # plt.figure() # plt.scatter(df['ahu_4_inlet'] - df['ahu_4_outlet'], df['ahu_4_power']) df_air_on = df[['ahu_1_air_on', 'ahu_2_air_on', 'ahu_3_air_on', 'ahu_4_air_on']] X_air_on = df_air_on.values df_outlet = df[['ahu_1_outlet', 'ahu_2_outlet', 'ahu_3_outlet', 'ahu_4_outlet']] X_outlet = df_outlet.values df_inlet = df[['ahu_1_inlet', 'ahu_2_inlet', 'ahu_3_inlet', 'ahu_4_inlet']] X_inlet = df_inlet.values df_inlet_rh = df[['ahu_1_inlet_rh', 'ahu_2_inlet_rh', 'ahu_3_inlet_rh', 'ahu_4_inlet_rh']] X_inlet_rh = df_inlet_rh.values df_power = df[['ahu_1_power', 'ahu_2_power', 'ahu_3_power', 'ahu_4_power']] X_power = df_power.values # plt.scatter(X_air_on.ravel() - X_outlet.ravel(), X_power.ravel()) # plt.scatter(X_air_on.ravel() - X_outlet.ravel(), X_power.ravel()) # for i in range(4): # plt.figure() # plt.plot(X_air_on[:,i]) # plt.plot(X_outlet[:,i]) # plt.plot(X_power[:,i]) # plt.figure() # plt.scatter(X_power[:,i], (X_inlet[:,i] + X_air_on[:,i]) / 2 - X_outlet[:,i]) # plt.ylabel('Mean air on / inlet - outlet') # plt.xlabel('Power') # plt.figure() # plt.scatter(X_power[:,i], X_air_on[:,i] - X_outlet[:,i]) # plt.ylabel('Air on - outlet') # plt.xlabel('Power') # plt.title('AHU {}'.format(i + 1)) # plt.figure() # plt.scatter(X_power[:,i], X_inlet[:,i] - X_outlet[:,i]) # plt.ylabel('Inlet - outlet') # plt.xlabel('Power') # plt.figure() # plt.scatter(X_air_on[:, i], X_outlet[:, i]) # plt.ylabel('Outlet') # plt.xlabel('Air on') # plt.figure() # plt.scatter(X_inlet[:, i], X_outlet[:, i]) # plt.ylabel('Outlet') # plt.xlabel('Inlet') # plt.figure() # plt.scatter(X_outlet[:,i], (X_inlet[:,i] + X_air_on[:,i]) / 2 - X_outlet[:,i]) # plt.ylabel('Mean air on / inlet - outlet') # plt.xlabel('Outlet') # plt.figure() # plt.scatter(np.sum(df_air_on, axis=1) - np.sum(df_outlet, axis=1), # df['room_cooling_power_(kw)']) # plt.figure() # plt.scatter((np.sum(df_outlet, axis=1)), # df['room_cooling_power_(kw)']) plt.figure() plt.scatter(X_power[:,2], X_air_on[:,2] - X_outlet[:,2]) plt.ylabel('Air on - outlet') plt.xlabel('Power') plt.title('AHU 3') low = np.where(X_air_on[:,2] - X_outlet[:,2] < 4)[0] high = np.where(X_air_on[:,2] - X_outlet[:,2] >= 4)[0] linreg = LinearRegression() linreg.fit(X_power[low, 2].reshape(len(low), 1), (X_air_on[low, 2] - X_outlet[low, 2]).reshape(len(low), 1)) plt.plot(X_power[low, 2], linreg.predict((X_power[low, 2]).reshape(len(low), 1))) print("{} Linear regression: intercept = {}, coef = {}".format(0, linreg.intercept_, linreg.coef_)) linreg.fit(X_power[high, 2].reshape(len(high), 1), (X_air_on[high, 2] - X_outlet[high, 2]).reshape(len(high), 1)) plt.plot(X_power[high, 2], linreg.predict((X_power[high, 2]).reshape(len(high), 1))) print("{} Linear regression: intercept = {}, coef = {}".format(0, linreg.intercept_, linreg.coef_)) # for i in range(4): # linreg.fit(X_air_on[:, i], X_outlet[:, i]) # print("{} Linear regression: intercept = {}, coef = {}".format(i, linreg.intercept_, linreg.coef_)) plt.figure() powersLines = plt.plot(X_power) plt.xlabel("Time") plt.ylabel("Power") plt.legend(powersLines, ("AHU 1", "AHU 2", "AHU 3", "AHU 4")) plt.figure() powersLines = plt.plot(X_outlet) plt.xlabel("Time") plt.ylabel("Outlet") plt.legend(powersLines, ("AHU 1", "AHU 2", "AHU 3", "AHU 4")) plt.figure() plt.scatter(X_air_on[:, 0] - X_outlet[:, 0], X_air_on[:, 1] - X_outlet[:, 1]) plt.scatter(X_air_on[:, 3] - X_outlet[:, 3], X_air_on[:, 1] - X_outlet[:, 1]) plt.show()
def main(layout, model, transform, output): variables = getattr(layouts, layout) K = list(variables.keys()) df = utils.prep_dataframe(keep=K) df_shifted = utils.create_shifted_features(df) df = df.join(df_shifted, how="outer") df = df.dropna() # print(list(enumerate(df.columns.values))) # assert False if transform: print("* Tranform data") X = tr.to_normal(df.values) df = pd.DataFrame(X, index=df.index.values, columns=df.columns.values) X = df.values if model[0] == 'gmrf': model = GMRF(variables_names=df.columns.values, alpha=0.1) elif model[0] == 'hybrid': model = HRF(k=5, k_star=10, variables_names=df.columns.values) lim = int(X.shape[0] * 0.75) X_train = X[:lim] X_test = X[lim:] model.fit(X_train) print("* Model Fitted") # controls_vars = ['ahu_1_outlet', 'ahu_2_outlet', 'ahu_3_outlet', 'ahu_4_outlet'] controls_vars = ['ahu_3_outlet'] controller = Controller(6, 15, 30) mdp = MDP(model, 1000, reward, 0.8, feature_creator, controller, controls_vars, n_jobs=3) mdp.learn() # plt.figure() # plt.hist(test[:, 38:42].ravel(), bins=5, range=(5, 30)) # plt.figure() # plt.plot(test[:, 38:42]) actions, states = run_simulation(X_test, controls_vars, mdp, model, controller) print(actions) actions_values_one = [None] * len(controls_vars) actions_values_two = [None] * len(controls_vars) for i in range(len(controls_vars)): actions_values_one[i] = [(j, a[i][1]) for j, a in enumerate(actions) if a[i][0] == 0] actions_values_two[i] = [(j, a[i][1]) for j, a in enumerate(actions) if a[i][0] == 1] actions_values_one[i] = list(zip(*actions_values_one[i])) actions_values_two[i] = list(zip(*actions_values_two[i])) for i in range(len(controls_vars)): plt.figure() if len(actions_values_one[i]) != 0: plt.plot(list(actions_values_one[i][0]), list(actions_values_one[i][1]), 'b') if len(actions_values_two[i]) != 0: plt.plot(list(actions_values_two[i][0]), list(actions_values_two[i][1]), 'g') plt.title(controls_vars[i]) max_states = np.amax(states, axis=1) mean_states = np.mean(states, axis=1) min_states = np.amin(states, axis=1) plt.figure() plt.plot(max_states, 'r') plt.plot(mean_states, 'g') plt.plot(min_states, 'b') # plt.figure() # plt.plot(actions[:, 0], label="1") # plt.plot(actions[:, 1], label="2") # plt.plot(actions[:, 2], label="3") # plt.plot(actions[:, 3], label="4") # plt.legend(loc=0) # print(np.mean(actions, axis=0)) plt.show()