def compute_inner_error(overall_df_inner, learning_rate_cv, num_iterations_cv, num_season_factors_cv,num_home_factors_cv, lam_cv, A_source): print num_iterations_cv, num_season_factors_cv,num_home_factors_cv,lam_cv inner_kf = KFold(n_splits=2) pred_inner = {} for train_inner, test_inner in inner_kf.split(overall_df_inner): train_ix_inner = overall_df_inner.index[train_inner] test_ix_inner = overall_df_inner.index[test_inner] train_test_ix_inner = np.concatenate([test_ix_inner, train_ix_inner]) df_t_inner, dfc_t_inner = target_df.loc[train_test_ix_inner], target_dfc.loc[train_test_ix_inner] tensor_inner = get_tensor(df_t_inner, start, stop) tensor_copy_inner = tensor_inner.copy() # First n tensor_copy_inner[:len(test_ix_inner), 1:, :] = np.NaN L_inner = target_L[np.ix_(np.concatenate([test_inner, train_inner]), np.concatenate([test_inner, train_inner]))] if setting=="transfer": A_source = A_store[learning_rate_cv][num_season_factors_cv][num_home_factors_cv][lam_cv][num_iterations_cv] else: A_source = None H, A, T, Hs, As, Ts, HATs, costs = learn_HAT_adagrad_graph(case, tensor_copy_inner, L_inner, num_home_factors_cv, num_season_factors_cv, num_iter=num_iterations_cv, lr=learning_rate_cv, dis=False, lam=lam_cv, A_known=A_source, T_known=T_constant) HAT = multiply_case(H, A, T, case) for appliance in APPLIANCES_ORDER: if appliance not in pred_inner: pred_inner[appliance] = [] pred_inner[appliance].append(pd.DataFrame(HAT[:len(test_ix_inner), appliance_index[appliance], :], index=test_ix_inner)) err = {} appliance_to_weight = [] for appliance in APPLIANCES_ORDER[1:]: pred_inner[appliance] = pd.DataFrame(pd.concat(pred_inner[appliance])) try: if appliance == "hvac": err[appliance] = compute_rmse_fraction(appliance, pred_inner[appliance][range(5-start, 11-start)], target, start, stop)[2] else: err[appliance] = compute_rmse_fraction(appliance, pred_inner[appliance], target, start, stop)[2] appliance_to_weight.append(appliance) except Exception, e: # This appliance does not have enough samples. Will not be # weighed print 'here' print(e) print(appliance)
directory = os.path.expanduser( '~/git/scalable-nilm/aaai18/predictions/TF-all/{}/case-{}/{}/{}'.format(setting, case, static_use, constant_use)) if not os.path.exists(directory): os.makedirs(directory) filename = os.path.join(directory, name + '.pkl') try: out[case][constant_use][static_use][setting][train_percentage][random_seed]={} params[case][constant_use][static_use][setting][train_percentage][random_seed] = {} pr = pickle.load(open(filename, 'r')) pred = pr['Predictions'] parameter_data = pr['Learning Params'] params[case][constant_use][static_use][setting][train_percentage][random_seed] = parameter_data for appliance in APPLIANCES_ORDER[1:]: # prediction = pred[appliance] prediction = pred[appliance].ix[idx] if appliance == 'hvac': prediction = prediction[range(5-start, 11-start)] out[case][constant_use][static_use][setting][train_percentage][random_seed][appliance] = \ compute_rmse_fraction(appliance, prediction, target, start, stop)[2] print("Computed for: {}".format(name)) # print case, constant_use, static_use, setting, train_percentage, random_seed # print out[case][constant_use][static_use][setting][train_percentage][random_seed] except Exception, e: print(e) print("Exception") out[case][constant_use][static_use][setting][train_percentage] = pd.DataFrame(out[case][constant_use][static_use][setting][train_percentage]).mean(axis=1) pickle.dump(out, open('../predictions/lr-tf-{}-{}-part.pkl'.format(source, target), 'w')) pickle.dump(params, open('../predictions/params-lr-tf-{}-{}-part.pkl'.format(source, target), 'w'))
pred_inner[appliance].append( pd.DataFrame( HAT[:len(test_ix_inner), appliance_index[appliance], :], index=test_ix_inner)) err = {} appliance_to_weight = [] for appliance in APPLIANCES_ORDER[1:]: pred_inner[appliance] = pd.DataFrame( pd.concat(pred_inner[appliance])) try: if appliance == "hvac": err[appliance] = compute_rmse_fraction( appliance, pred_inner[appliance][range( 5 - start, 11 - start)], target, start, stop)[2] else: err[appliance] = compute_rmse_fraction( appliance, pred_inner[appliance], target, start, stop)[2] appliance_to_weight.append(appliance) except Exception, e: # This appliance does not have enough samples. Will not be # weighed print(e) print(appliance) sys.stdout.flush() print("Error weighted on: {}".format( appliance_to_weight))
features, random_seed, train_percentage) else: name = "{}-{}-{}-{}-{}-{}".format( source, target, appliance, features, random_seed, train_percentage) directory = os.path.expanduser( '~/git/scalable-nilm/aaai18/predictions/MF/{}'.format( setting)) if not os.path.exists(directory): os.makedirs(directory) filename = os.path.join(directory, name + '.pkl') pred = pickle.load(open(filename, 'r'))['Prediction'] print(filename) out[setting][train_percentage][random_seed][appliance] = \ compute_rmse_fraction(appliance, pred, target)[2] except: pass out[setting][train_percentage] = pd.DataFrame( out[setting][train_percentage]).mean(axis=1) save_loc = os.path.expanduser( "~/git/scalable-nilm/aaai18/predictions/mf-{}-{}.pkl".format( source, target)) pickle.dump(out, open(save_loc, 'w')) pickle.dump( out, open('../../../predictions/mf-{}-{}.pkl'.format(source, target), 'w')) #pickle.dump(params, open('../predictions/params-lr-tf-{}-{}.pkl'.format(source, target), 'w'))
sys.stdout.flush() err = {} for num_iterations in range(100, 1400, 200): err[num_iterations] = {} for appliance in APPLIANCES_ORDER: pred[appliance] = [] for appliance in APPLIANCES_ORDER: pred[appliance].append(pd.DataFrame(HATs[num_iterations][:, appliance_index[appliance], :], index=source_df.index)) for appliance in APPLIANCES_ORDER: pred[appliance] = pd.DataFrame(pd.concat(pred[appliance])) for appliance in APPLIANCES_ORDER[1:]: prediction = pred[appliance] if appliance == "hvac": prediction = prediction[range(5-start, 11-start)] err[num_iterations][appliance] = \ compute_rmse_fraction(appliance, prediction, source, start, stop)[2] # print("Computed for: {}".format()) err_weight = {} for appliance in APPLIANCES_ORDER[1:]: err_weight[appliance] = err[num_iterations][appliance]*contri[source][appliance] mean_err = pd.Series(err_weight).sum() err_store[learning_rate_cv][num_season_factors_cv][num_home_factors_cv][lam_cv][num_iterations] = mean_err pickle.dump(H_store, open('../predictions/case-{}-graph_{}_{}_{}_{}_Hs.pkl'.format(case, source, constant_use, start, stop), 'w')) pickle.dump(A_store, open('../predictions/case-{}-graph_{}_{}_{}_{}_As.pkl'.format(case, source, constant_use, start, stop), 'w')) pickle.dump(err_store, open('../predictions/case-{}-graph_{}_{}_{}_{}_errs.pkl'.format(case, source, constant_use, start, stop), 'w'))
pd.DataFrame( HAT[:num_test, appliance_index[appliance], :], index=test_ix)) H_factors[learning_rate_cv][num_iterations_cv][ num_season_factors_cv][num_home_factors_cv][ lam_cv].append( pd.DataFrame(H[:num_test, :], index=test_ix)) # get the overall prediction error of all homes s = pd.concat(pred[appliance]).ix[target_df.index] err = {} for appliance in APPLIANCES_ORDER: if appliance == "hvac": err[appliance] = compute_rmse_fraction( appliance, s[range(4, 10)], target)[2] else: err[appliance] = compute_rmse_fraction( appliance, s, target)[2] err_weight = {} for appliance in APPLIANCES_ORDER[1:]: err_weight[appliance] = err[appliance] * contri[ target][appliance] mean_err = pd.Series(err_weight).sum() print learning_rate_cv, num_iterations_cv, num_season_factors_cv, num_home_factors_cv, lam_cv, mean_err error.append(mean_err) params[count] = [] params[count].extend( (learning_rate_cv, num_iterations_cv,
pred_inner = [] pred_inner.append( create_prediction(test_ix_inner, X, Y, X_normalised, appliance, col_max, col_min, appliance_cols)) err = {} pred_inner = pd.DataFrame(pd.concat(pred_inner)) try: if appliance == "hvac": err[appliance] = \ compute_rmse_fraction(appliance, pred_inner[['hvac_{}'.format(month) for month in range(5, 11)]], 'SanDiego')[2] else: err[appliance] = compute_rmse_fraction( appliance, pred_inner, 'SanDiego')[2] except Exception, e: # This appliance does not have enough samples. Will not be # weighed print(e) print(appliance) err_weight = {} mean_err = pd.Series(err).sum() if mean_err < least_error: best_num_iterations = num_iterations_cv best_num_latent_factors = num_latent_factors_cv least_error = mean_err