def plot_validation_predict_and_obs(self, show_title=True, save_loc=None, plot_bounds=None): show_title = False save_loc = None plot_bounds = None #plot_bounds=(1150, 1200) #save_loc = 'pic_ANN_set0_validation1' plot_bounds=(5180, 5230) save_loc = 'pic_ANN_set0_validation2' label_font = {'size': 20} tick_label_size = 15 title_label_size = 30 predictions = self.ml_obj.best_network[0].sim(self.ml_obj.validation_input) observations = self.ml_obj.validation_target if True: # this can be changed to self.min_max_scaler_target is not None (cant yet due to legacy) min_max_scaler_target = preprocessing.MinMaxScaler() training_target = min_max_scaler_target.fit_transform(self.data_package.training_target) predictions = min_max_scaler_target.inverse_transform(predictions) observations = min_max_scaler_target.inverse_transform(observations) print(get_all_error_metrics(observations, predictions, None)) #if self.ml_obj.min_max_scaler_target is not None: # predictions = self.min_max_scaler_target.inverse_transform(predictions) # observations = self.min_max_scaler_target.inverse_transform(observations) fig_validation = plt.figure(figsize=(10, 8)) ax = fig_validation.add_subplot(111) x = [] dt = datetime.datetime.combine(self.data_package.validation_start_date, datetime.datetime.min.time()) for i in range(len(self.ml_obj.validation_input)): x.append(dt + datetime.timedelta(minutes=(15 * i))) if plot_bounds is not None: lower = plot_bounds[0] upper = plot_bounds[1] ax.plot(x[lower:upper], observations.clip(min=0)[lower:upper], 'o-', label='observations') ax.plot(x[lower:upper], predictions.clip(min=0, max=10)[lower:upper], 'r^-', label='predictions') else: ax.plot(x, observations.clip(min=0), 'o-', label='observations') ax.plot(x, predictions.clip(min=0, max=10), 'r^-', label='predictions') if show_title: errors_dict = calc_all_error('RMSE, NSE, RSEnaive, R', observations, predictions, round_to=5, rolling_average_size=96, upper_lim=10, lower_lim=0.05) fig_validation.suptitle('Validation Data: RMSE: {} NSE: {} RSEnaive: {} R: {}'.format( errors_dict['RMSE'], errors_dict['NSE'], errors_dict['RSEnaive'], errors_dict['R']), fontsize=title_label_size) ax.set_ylabel('Turbidity in NTU', fontdict=label_font) ax.tick_params(axis='both', which='major', labelsize=tick_label_size) ax.legend(loc='best') fig_validation.tight_layout() if show_title: fig_validation.subplots_adjust(top=0.90) if save_loc is not None: plt.savefig(save_loc+'.png') else: plt.show()
def run_main(): # load in data settings about a data set from a ini file, ini file must be in same directory as this file data_settings = DataSetSettings(os.path.join(os.path.dirname(os.getcwd()), ini_file)) data_set_files = data_settings.get_data_set_files() meter_groups = [] for meter_file in data_set_files: # create meter groups from csv data meter_groups.append(MeterGroup(meter_file[0], meter_file[1], meter_file[2])) # set meter group settings meter_groups[-1].set_meter_group_settings(data_settings.meter_groups()) # create data packer and add meter groups data_packer1 = DataPacker(meter_groups) for time_horizon in range(1, 2): # package data for machine learning data_package1 = data_packer1.package_data(data_settings.packer_input(), data_settings.packer_target(), forecast_horizon_override=time_horizon) print('Data Packaged') if False: data = data_package1.training_input.T # data = data_package1.validation_input.T meta = data_package1.training_input_meta[0] embedding_search_fnn(data, meta, go_to=15, threshold=50, ratio=0.001) if False: for ann_count in range(10): with open(os.path.join(os.path.dirname(os.getcwd()), log_file), 'a') as f: f.write('Forecast Horizon: {} ANN Count: {}\n'.format(time_horizon, ann_count)) while True: # create machine learning obj to run ANN ann1 = MachineLearning('ANN', data_package1, data_settings.ann()) print('Machine Learning Obj Initialized') # run: ml_type, package_name, network_layout, layer_func, max_epochs, trains_per_validation_run, # max_worse_validations stop_function, training_goal, verbose_settings #code, msg = ann1.run(network_layout='{}, 1'.format(time_horizon)) code, msg = ann1.run() if code == 1: # Failed to learn print(msg) with open(os.path.join(os.path.dirname(os.getcwd()), log_file), 'a') as f: f.write(msg + '\n') else: print(msg) with open(os.path.join(os.path.dirname(os.getcwd()), log_file), 'a') as f: f.write(msg + '\n') print('Machine Learning Obj Finished Run') data_collector1 = DataCollector(ann1, data_package1, data_settings) pickle_file = 'data\\Hillersdon\\hill-exp{}_p{}-h50-t{}-r{}.pickle'.format( experiment_id, process_id, time_horizon, ann_count) with open(pickle_file, 'wb') as pf: pickle.dump(data_collector1, pf, pickle.HIGHEST_PROTOCOL) print('Data Collector Pickled') with open(os.path.join(os.path.dirname(os.getcwd()), log_file), 'a') as f: f.write('Run Count: {}, Pickled: {}\n'.format(str(len(data_collector1.ml_obj.inbuilt_errors)), pickle_file)) f.write('Training Errors: {}\n'.format(get_last_errors(ann1.training_errors))) f.write('Validation Errors: {}\n'.format(get_last_errors(ann1.validation_errors))) predictions = data_collector1.ml_obj.best_network[0].sim(data_collector1.ml_obj.training_input) observations = data_collector1.ml_obj.training_target train_errors_str = get_all_error_metrics(observations, predictions, data_collector1.data_package) predictions = data_collector1.ml_obj.best_network[0].sim(data_collector1.ml_obj.validation_input) observations = data_collector1.ml_obj.validation_target val_errors_str = get_all_error_metrics(observations, predictions, data_collector1.data_package) with open(os.path.join(os.path.dirname(os.getcwd()), results_file), 'a') as f: f.write('Run Count: {}, Pickled: {}\n'.format(str(len(data_collector1.ml_obj.inbuilt_errors)), pickle_file)) f.write('Training Errors, {}\n'.format(train_errors_str)) f.write('Validation Errors, {}\n'.format(val_errors_str)) #data_collector1.plot_all() break print('Done!')
def plot_training_predict_and_obs(self, show_title=True, save_loc=None, plot_bounds=None): show_title = False save_loc = None plot_bounds = None #plot_bounds=(5750, 6300) #save_loc = 'pic_ANN_set0_training1' plot_bounds = (15690, 15760) save_loc = 'pic_ANN_set0_training2' label_font = {'size': 20} tick_label_size = 15 title_label_size = 30 predictions = self.ml_obj.best_network[0].sim(self.ml_obj.training_input) observations = self.ml_obj.training_target #if self.ml_obj.min_max_scaler_target is not None: # predictions = self.min_max_scaler_target.inverse_transform(predictions) # observations = self.min_max_scaler_target.inverse_transform(observations) if True: # this can be changed to self.min_max_scaler_target is not None (cant yet due to legacy) min_max_scaler_target = preprocessing.MinMaxScaler() training_target = min_max_scaler_target.fit_transform(self.data_package.training_target) predictions = min_max_scaler_target.inverse_transform(predictions) observations = min_max_scaler_target.inverse_transform(observations) if False: bench = [] hist_error = [] # Mean for i in range(len(predictions)): bench.append(observations.mean()) hist_error.append(abs(observations[i] - bench[i])) # # Rolling Mean # rolling_average_size = 4 # bench.append(observations[0][0]) # hist_error.append(abs(observations[0] - bench[0])) # for i in range(1, len(predictions)): # y = i - rolling_average_size # if y < 0: # y = 0 # bench.append(np.mean(observations[y:i])) # hist_error.append(abs(observations[i] - bench[i])) # # Naive # bench.append(observations[0]) # hist_error.append(abs(observations[0] - bench[0])) # for i in range(1, len(predictions)): # bench.append(observations[i - 1]) # hist_error.append(abs(observations[i] - bench[i])) # # Positive Diff Naive # bench.append(observations[0]) # hist_error.append(abs(observations[0] - bench[0])) # for i in range(1, len(predictions)): # bench.append(observations[i-1]) # if observations[i] > observations[i-1]: # hist_error.append(abs(observations[i] - bench[i])) # else: # hist_error.append(0) # # Positive Diff Naive (only > 2 NTU) # bench.append(observations[0]) # hist_error.append(abs(observations[0] - bench[0])) # for i in range(1, len(predictions)): # bench.append(observations[i - 1]) # if observations[i] > observations[i - 1] and observations[i] >= 2: # hist_error.append(abs(observations[i] - bench[i])) # else: # hist_error.append(0) hist_error = np.array(hist_error) predictions = np.array(bench) print(get_all_error_metrics(observations, predictions, None)) fig_training = plt.figure(figsize=(10, 8)) ax = fig_training.add_subplot(111) x = [] dt = datetime.datetime.combine(self.data_package.training_start_date, datetime.datetime.min.time()) for i in range(len(self.ml_obj.training_input)): x.append(dt + datetime.timedelta(minutes=(15*i))) if plot_bounds is not None: lower = plot_bounds[0] upper = plot_bounds[1] ax.plot(x[lower:upper], observations.clip(min=0)[lower:upper], 'o-', label='observations') ax.plot(x[lower:upper], predictions.clip(min=0, max=10)[lower:upper], 'r^-', label='predictions') #ax.bar(x[lower:upper], hist_error[lower:upper], color='g', width=0.005, align='center', label='absolute error') else: ax.plot(x, observations.clip(min=0), 'o-', label='observations') ax.plot(x, predictions.clip(min=0, max=10), 'r^-', label='predictions') # TODO: reassess if show_title: errors_dict = calc_all_error('RMSE, NSE, RSEnaive, R', observations, predictions, round_to=5, rolling_average_size=96, upper_lim=10, lower_lim=0.05) # TODO: make dynamic fig_training.suptitle('Training Data: RMSE: {} NSE: {} RSEnaive: {} R: {}'.format( errors_dict['RMSE'], errors_dict['NSE'], errors_dict['RSEnaive'], errors_dict['R']), fontsize=title_label_size) ax.set_ylabel('Turbidity in NTU', fontdict=label_font) ax.tick_params(axis='both', which='major', labelsize=tick_label_size) ax.legend(loc='best') fig_training.tight_layout() if show_title: fig_training.subplots_adjust(top=0.90) if save_loc is not None: plt.savefig(save_loc + '.png') else: plt.show()