def train_two_dim_mc_using_first_2_years(full_this_wind_turbine: WT): full_this_wind_turbine_pro = copy.deepcopy(full_this_wind_turbine) full_this_wind_turbine_pro.do_truncate( datetime.datetime(2007, 1, 1, 0, 0), datetime.datetime(2008, 11, 30, 23, 50)) mc_matrix, mob = {}, {} for this_season in SeasonTemplate1.__members__: specific_this_wind_turbine = copy.deepcopy(full_this_wind_turbine_pro) specific_this_wind_turbine.do_truncate_by_season(this_season) ws_wd_time_series = np.array([ specific_this_wind_turbine.measurements['wind speed'].values, specific_this_wind_turbine.measurements['absolute wind direction']. values, specific_this_wind_turbine. measurements['active power output'].values ]).T all_not_outlier = np.all( (specific_this_wind_turbine.outlier_category_detailed['wind speed'] .values == 0, specific_this_wind_turbine. outlier_category_detailed['absolute wind direction'].values == 0, specific_this_wind_turbine. outlier_category_detailed['active power output'].values == 0), axis=0) # 将有outlier的数据行置为nan ws_wd_time_series[~all_not_outlier, :] = np.nan # 生成mc矩阵 two_dim_encoded = __encode_markov_chain(ws_wd_time_series[:, 0], ws_wd_time_series[:, 1]) path_ = ''.join( (specific_this_wind_turbine.results_path, 'two_dim_mc/', specific_this_wind_turbine.__str__(), '/')) try_to_find_folder_path_otherwise_make_one(path_) # mc_matrix @load_exist_pkl_file_otherwise_run_and_save(path_ + 'mc_matrix.pkl') def cal_two_dim_mc_matrix(): mc_matrix_ = OneDimMarkovChain(current_state=two_dim_encoded[:-1], next_state=two_dim_encoded[1:]) return mc_matrix_.state_markov_chain_in_matrix mc_matrix.setdefault(this_season, cal_two_dim_mc_matrix) # unique_state @load_exist_pkl_file_otherwise_run_and_save(path_ + 'unique_state.pkl') def cal_unique_state(): return np.unique( two_dim_encoded[:-1][~np.isnan(two_dim_encoded[:-1])]) mc_matrix.setdefault(this_season, cal_unique_state) # 找到编码2维变量和同一时刻功率的关系: mob_ = MethodOfBins(two_dim_encoded, ws_wd_time_series[:, 2], first_bin_left_boundary=-0.5, bin_step=1) save_pkl_file(path_ + 'mob.pkl', mob_) mob.setdefault(this_season, mob_) return mc_matrix, mob
def wrapper(*args, **kwargs): if isinstance(file_path, Path): try_to_find_folder_path_otherwise_make_one(file_path.parent) if load_pkl_file(file_path) is not None: return load_pkl_file(file_path) else: obj = func(*args, **kwargs) save_pkl_file(file_path, obj) return obj
def save_pkl_file(file_path: Path, obj): try_to_find_folder_path_otherwise_make_one(file_path.parent) file_path = str(file_path) try: with open(file_path, 'wb') as f: pickle.dump(obj, f, protocol=4) except FileNotFoundError: file_path = re.sub('/', '//', file_path) with open(file_path, 'wb') as f: pickle.dump(obj, f, protocol=4)
def test_on_year3_and_by_seasons(full_this_wind_turbine: WT, doing_day_analysis: bool = False): """ 这是PMAPS paper的test """ full_this_wind_turbine_pro = copy.deepcopy(full_this_wind_turbine) # %% 只选择用前两年数据生成的模型 full_this_wind_turbine_pro.do_truncate( datetime.datetime(2007, 1, 1, 0, 0), datetime.datetime(2008, 11, 30, 23, 50)) # 全年的结果 whole_year_test_buff = {}.fromkeys(('mean', '5', '95')) for key in whole_year_test_buff.keys(): whole_year_test_buff[key] = [] whole_year_model_buff = {}.fromkeys(('mfr', '2d_gmcm', 'cvine_gmcm')) for key in whole_year_model_buff.keys(): whole_year_model_buff[key] = {'mean': [], '5': [], '95': []} for this_season_idx, this_season in enumerate(SeasonTemplate1.__members__): specific_this_wind_turbine = copy.deepcopy(full_this_wind_turbine_pro) specific_this_wind_turbine.do_truncate_by_season(this_season) # %% 提取测试用的数据。给IET paper test_data = load_test_data_for_iet_paper(this_season_idx) # shift,因为这是forecast test_data['test_pout_actual_5'] = np.roll( test_data['test_pout_actual_5'], -1) test_data['test_pout_actual_5'][-1] = np.nan test_data['test_pout_actual_95'] = np.roll( test_data['test_pout_actual_95'], -1) test_data['test_pout_actual_95'][-1] = np.nan test_data['test_pout_actual'] = np.roll(test_data['test_pout_actual'], -1) test_data['test_pout_actual'][-1] = np.nan # %% 提取boundary的数据,因为只考虑region_a boundary_path_ = ''.join((specific_this_wind_turbine.results_path, '3d_cvine_gmcm_model_use_ws_ahead_1/' + specific_this_wind_turbine.__str__() + '/')) model_boundary = load_npy_file(boundary_path_ + 'model_boundary.npy') (region_1_mask, region_a_mask_in_input_data, region_rated_mask, region_b_mask_in_input_data, region_5_mask, hard_rated_mask, hard_cut_off_mask) = \ PowerCurve.cal_region_boundary_mask(model_boundary, test_data.get('test_ws_actual'), test_data.get('test_outlier_category') == 0) # Considering_one_day_mask time_mask_pure = np.full(region_1_mask.shape, False) if this_season == 'winter': day_start_idx, day_end_idx = 7862, 8007 elif this_season == 'spring': day_start_idx, day_end_idx = 7043, 7187 elif this_season == 'summer': day_start_idx, day_end_idx = 3993, 4137 elif this_season == 'autumn': day_start_idx, day_end_idx = 3849, 3993 else: raise time_mask_pure[day_start_idx:day_end_idx] = True if not doing_day_analysis: time_mask_pure[:] = True region_a_mask_and_time_mask = np.bitwise_and( region_a_mask_in_input_data, time_mask_pure) time_mask_and_cat_0_mask = np.bitwise_and( time_mask_pure, test_data.get('test_outlier_category') == 0) def cal_using_what_model(what_model_name): pout_by_what_model_mean = np.full( test_data.get('test_ws_actual').size, np.nan) pout_by_what_model_5 = np.full( test_data.get('test_ws_actual').size, np.nan) pout_by_what_model_95 = np.full( test_data.get('test_ws_actual').size, np.nan) if what_model_name == 'two_dim_mc': pout_by_what_model = estimate_active_power_output_by_two_dim_mc_model( specific_this_wind_turbine, np.stack((test_data.get('test_ws_actual'), test_data.get('test_wd_actual')), axis=1)[region_a_mask_and_time_mask, :]) elif what_model_name == 'cvine_gmcm': pout_by_what_model = specific_this_wind_turbine.estimate_active_power_output_by_3d_cvine_gmcm_model( np.stack((test_data.get('test_ws_actual'), test_data.get('test_wd_actual')), axis=1)[region_a_mask_and_time_mask, :], use_ws_ahead=1) elif what_model_name == 'empirical': pout_by_what_model = estimate_active_power_output_by_empirical_model( specific_this_wind_turbine, np.stack((test_data.get('test_ws_actual'), test_data.get('test_wd_actual')), axis=1)[region_a_mask_and_time_mask, :]) else: raise Exception("Check model name") pout_by_what_model_mean[region_a_mask_and_time_mask] = np.array( [x.mean_ for x in pout_by_what_model]) # 删除pdf信息,只要cdf就可以计算icdf。这样可以节约一半内存 for i in range(pout_by_what_model_mean.__len__()): if isinstance(pout_by_what_model_mean[i], UnivariatePDFOrCDFLike): pout_by_what_model_mean[i].pdf_like_ndarray = None # 计算5-95percentiles pout_by_what_model_5_95 = np.array([ x.find_nearest_inverse_cdf(np.array([0.05, 0.95])) for x in pout_by_what_model ]) pout_by_what_model_5[ region_a_mask_and_time_mask] = pout_by_what_model_5_95[:, 0] pout_by_what_model_95[ region_a_mask_and_time_mask] = pout_by_what_model_5_95[:, 1] return { 'pout_by_what_model_mean': pout_by_what_model_mean, 'pout_by_what_model_5': pout_by_what_model_5, 'pout_by_what_model_95': pout_by_what_model_95 } # %% c-vine GMCM cvine_gmcm_path = ''.join( (specific_this_wind_turbine.results_path, '3d_cvine_gmcm_model_use_ws_ahead_1/', specific_this_wind_turbine.__str__(), '/PMAPS_paper/')) try_to_find_folder_path_otherwise_make_one(cvine_gmcm_path) @load_exist_pkl_file_otherwise_run_and_save( cvine_gmcm_path + 'cvine_gmcm_results_{}.pkl'.format(doing_day_analysis)) def cal_using_cvine_gmcm_wrapper(): return cal_using_what_model('cvine_gmcm') cvine_gmcm_results = cal_using_cvine_gmcm_wrapper # %% two_dim_mc two_dim_mc_path = ''.join( (specific_this_wind_turbine.results_path, 'two_dim_mc/', specific_this_wind_turbine.__str__(), '/')) try_to_find_folder_path_otherwise_make_one(two_dim_mc_path) @load_exist_pkl_file_otherwise_run_and_save( two_dim_mc_path + 'two_dim_mc_results_{}.pkl'.format(doing_day_analysis)) def cal_using_two_dim_mc_wrapper(): return cal_using_what_model('two_dim_mc') two_dim_mc_results = cal_using_two_dim_mc_wrapper # %% empirical empirical_path = ''.join( (specific_this_wind_turbine.results_path, '3d_cvine_gmcm_model_use_ws_ahead_1/', specific_this_wind_turbine.__str__(), '/PMAPS_paper/')) try_to_find_folder_path_otherwise_make_one(empirical_path) @load_exist_pkl_file_otherwise_run_and_save( empirical_path + 'empirical_results_{}.pkl'.format(doing_day_analysis)) def cal_using_empirical_wrapper(): return cal_using_what_model('empirical') empirical_results = cal_using_empirical_wrapper """ 计算值计算值计算值计算值计算值计算值计算值计算值计算值计算值计算值计算值计算值计算值计算值计算值计算值计算值计算值计算值计算值 """ def cal_what_model_errors(what_model_name: str): nonlocal whole_year_test_buff what_model_errors = {}.fromkeys( ('mae', 'rmse', 'epsilon_mae', 'epsilon_rmse', 'delta_u', 'over_estimate', 'over_estimate_in_pct', 'under_estimate', 'under_estimate_in_pct', 'model_output_total', 'model_output_total_dividing_target_total', 'model_output_total_plus', 'model_output_total_plus_dividing_target_total'), np.nan) time_mask_and_cat_0_mask_model = time_mask_and_cat_0_mask if what_model_name == 'two_dim_mc': what_results = two_dim_mc_results elif what_model_name == 'cvine_gmcm': what_results = cvine_gmcm_results elif what_model_name == 'empirical': what_results = empirical_results else: raise what_mean = 'pout_by_what_model_mean' what_5 = 'pout_by_what_model_5' what_95 = 'pout_by_what_model_95' # %% 有些模型只考虑region_a,所以这里对其它region进行修正 what_results[what_5][np.bitwise_or(region_1_mask, hard_cut_off_mask)] = 0. what_results[what_mean][np.bitwise_or(region_1_mask, hard_cut_off_mask)] = 0. what_results[what_95][np.bitwise_or(region_1_mask, hard_cut_off_mask)] = 0. what_results[what_5][hard_rated_mask] = 3000. what_results[what_mean][hard_rated_mask] = 3000. what_results[what_95][hard_rated_mask] = 3000. # %% 正式计算errors的代码 what_model_errors['DeterministicError'] = DeterministicError( target=test_data.get( 'test_pout_actual')[time_mask_and_cat_0_mask] / 3000, model_output=what_results[what_mean] [time_mask_and_cat_0_mask_model].flatten() / 3000) what_model_errors.update({ 'mae': what_model_errors['DeterministicError']. cal_mean_absolute_error(), 'rmse': what_model_errors['DeterministicError']. cal_root_mean_square_error() }) if what_model_name != 'mfr': what_model_errors.update( ProbabilisticErrorIETPaperMethod( target=np.stack((test_data.get('test_pout_actual_5') [time_mask_and_cat_0_mask] / 3000, test_data.get('test_pout_actual_95') [time_mask_and_cat_0_mask] / 3000), axis=1), model_output=np.stack( (what_results[what_5] [time_mask_and_cat_0_mask_model].flatten() / 3000, what_results[what_95] [time_mask_and_cat_0_mask_model].flatten() / 3000), axis=1)).do_calculation()) what_model_errors.update( EnergyBasedError(target=test_data.get( 'test_pout_actual')[time_mask_and_cat_0_mask] / 1000, model_output=what_results[what_mean] [time_mask_and_cat_0_mask_model].flatten() / 1000, time_step=1 / 6).do_calculation()) return what_model_errors two_dim_mc_error = cal_what_model_errors('two_dim_mc') cvine_gmcm_error = cal_what_model_errors('cvine_gmcm') empirical_error = cal_what_model_errors('empirical') # Form a table errors_table = [] for this_error in (empirical_error, two_dim_mc_error, cvine_gmcm_error): errors_table.append([ this_error['mae'], this_error['rmse'], this_error['epsilon_mae'], this_error['epsilon_rmse'], this_error['delta_u'], this_error['over_estimate'], this_error['over_estimate_in_pct'], this_error['under_estimate'], this_error['under_estimate_in_pct'], this_error['model_output_total'], this_error['model_output_total_dividing_target_total'], this_error['model_output_total_plus'], this_error['model_output_total_plus_dividing_target_total'] ]) errors_table = np.array(errors_table) if all(time_mask_pure): savemat(cvine_gmcm_path + 'errors_table.mat', {'errors_table': errors_table}) else: savemat(cvine_gmcm_path + 'errors_table_day.mat', {'errors_table': errors_table}) """ 画图画图画图画图画图画图画图画图画图画图画图画图画图画图画图画图画图画图画图画图画图画图画图画图画图画图画图画图画图画图画图 """ xx = np.linspace(day_start_idx, day_end_idx, day_end_idx - day_start_idx) / 144 """ # %% 大图 def plot_day_comparison_of_model_means(with_uncertain_inputs=False): ax = time_series(x=xx, y=test_data.get('test_pout_actual')[time_mask_pure] / 3000, label='Actl-M.', linestyle='-', color=[0, 1, 0]) if not with_uncertain_inputs: ax = time_series(x=xx, y=pout_by_mfr[time_mask_pure] / 3000, ax=ax, label='Mfr PC', linestyle='-.', color='k') ax = time_series(x=xx, y=_2d_gmcm_results['pout_by_what_model_mean'][time_mask_pure] / 3000, ax=ax, label='2d_gmcm', linestyle=':', color='b', linewidth=1.5) ax = time_series(x=xx, y=cvine_gmcm_results['pout_by_cvine_gmcm_mean'][time_mask_pure] / 3000, ax=ax, label='cvine_gmcm', linestyle='--', color='b', save_file_=cvine_gmcm_path + 'whole_day', x_label='Time stamp (x-th day in the season)', y_label='Power output (p.u.)', x_lim=(xx[0] - 0.01, xx[-1] + 0.01), y_lim=(-0.01, 1.01)) else: ax = time_series(x=xx, y=_2d_grid_search_results['grid_results_mean_'] / 3000, ax=ax, label='_2d_grid_search_results', linestyle=':', color='b', linewidth=1.5) ax = time_series(x=xx, y=_3d_grid_search_results['grid_results_mean_'] / 3000, ax=ax, label='_3d_grid_search_results', linestyle='--', color='b', save_file_=cvine_gmcm_path + 'whole_day_with_uncertain', x_label='Time stamp (x-th day in the season)', y_label='Power output (p.u.)', x_lim=(xx[0] - 0.01, xx[-1] + 0.01), y_lim=(-0.01, 1.01)) """ # %% 小图 def plot_one_model_uncertainty_and_save_csv(model_name, y_linestyle: str, *, save_csv_name: str = None): if model_name == 'two_dim_mc': model_results = two_dim_mc_results elif model_name == 'cvine_gmcm': model_results = cvine_gmcm_results elif model_name == 'empirical': model_results = empirical_results else: raise y1 = model_results['pout_by_what_model_5'][time_mask_pure] / 3000 y2 = model_results['pout_by_what_model_95'][time_mask_pure] / 3000 y_mean = model_results['pout_by_what_model_mean'][ time_mask_pure] / 3000 ax = series_uncertainty_plot( xx, y1=test_data['test_pout_actual_5'][time_mask_pure] / 3000, y2=test_data['test_pout_actual_95'][time_mask_pure] / 3000, facecolor='g', edgecolor='g', hatch='/' * 6, linestyle='-', linewidth=1, alpha=0.2) ax = series_uncertainty_plot(xx, y1=y1, y2=y2, ax=ax, facecolor='b', edgecolor='b', hatch='\\' * 6, linestyle='-', linewidth=1, alpha=0.2) ax = series(x=xx, y=test_data.get('test_pout_actual')[time_mask_pure] / 3000, ax=ax, linestyle='-', color=[0, 1, 0]) ax = series(xx, y_mean, ax=ax, linestyle=y_linestyle, color='b', save_file_=cvine_gmcm_path + model_name, x_label='Time stamp (x-th day in the season)', y_label='Power output (p.u.)', x_lim=(xx[0] - 0.01, xx[-1] + 0.01), y_lim=(-0.01, 1.01)) if save_csv_name is not None: f = open(cvine_gmcm_path + save_csv_name + '.csv', 'w', encoding='utf-8') csv_writer = csv.writer(f) header = [ 'actual_Pout_mean', 'actual_Pout_5_percentile', 'actual_Pout_95_percentile' ] header_extend = [ '_model_mean', '_model_5_percentile', '_model_95_percentile' ] header_extend = [model_name + x for x in header_extend] header.extend(header_extend) csv_writer.writerow(header) rows = np.stack( (test_data['test_pout_actual'][time_mask_pure] / 3000, test_data['test_pout_actual_5'][time_mask_pure] / 3000, test_data['test_pout_actual_95'][time_mask_pure] / 3000, y_mean, y1, y2), axis=1) csv_writer.writerows(rows.tolist()) if not (all(time_mask_pure)): # plot_day_comparison_of_model_means() # plot_day_comparison_of_model_means(True) plot_one_model_uncertainty_and_save_csv('two_dim_mc', ':', save_csv_name='two_dim_mc') plot_one_model_uncertainty_and_save_csv('cvine_gmcm', '--', save_csv_name='cvine_gmcm') plot_one_model_uncertainty_and_save_csv('empirical', '-.', save_csv_name='empirical') """
def energies_paper_train_torch_model_for_ampds2_dataset( *, appliance_original_name: str = None, appliance_type_name: str = None, sample_period: int, model_save_path: Path, transform_args_file_path: Path) -> dict: training_time_path = model_save_path.parent / re.sub( r'_model', '_training_and_loss.pkl', model_save_path.stem) # TODO if True: # if not try_to_find_file(model_save_path): ############################################################ epoch_num = 25000 training_torch_set_dl_bs = 90 # training_torch_set_dl_bs = 25 hidden_size = 1024 learning_rate = 1e-4 # weight_decay = 0.00000001 weight_decay = 0.000001 dropout = 0.1 lstm_layer_num = 3 ############################################################# training_torch_set, test_torch_set = energies_paper_prepare_dataset_for_torch_model_for_ampds2_dataset( appliance_original_name=appliance_original_name, appliance_type_name=appliance_type_name, sample_period=sample_period, transform_args_file_path=transform_args_file_path) save_pkl_file(Path(r'.\training_torch_set'), training_torch_set) training_torch_set = load_pkl_file(Path(r'.\training_torch_set')) training_torch_set_dl = DataLoader(training_torch_set, batch_size=training_torch_set_dl_bs, shuffle=False) save_pkl_file(Path(r'.\training_torch_set_dl'), training_torch_set_dl) training_torch_set_dl = load_pkl_file(Path(r'.\training_torch_set_dl')) # %% 定义模型 input_feature_len = training_torch_set[0][0].size()[-1] input_sequence_len = training_torch_set[0][0].size()[-2] output_feature_len = training_torch_set[0][1].size()[-1] output_sequence_len = training_torch_set[0][1].size()[-2] # lstm_encoder = GRUEncoder( # gru_layer_num=lstm_layer_num, # input_feature_len=input_feature_len, # sequence_len=input_sequence_len, # hidden_size=hidden_size, # bidirectional=False, # dropout=dropout # ) # # lstm_decoder = GRUDecoder( # gru_layer_num=lstm_layer_num, # output_feature_len=output_feature_len, # hidden_size=hidden_size, # dropout=dropout, # decoder_input_feature_len=hidden_size, # attention_units=32, # ) # # simple_lstm_model = GRUEncoderDecoderWrapper( # gru_encoder=lstm_encoder, # gru_decoder=lstm_decoder, # output_sequence_len=output_sequence_len, # output_feature_len=output_feature_len, # teacher_forcing=0.01 # ) simple_lstm_model = StackedBiLSTM( lstm_layer_num=lstm_layer_num, input_feature_len=input_feature_len, hidden_size=hidden_size, output_feature_len=output_feature_len, dropout=dropout) # simple_lstm_model = torch.nn.DataParallel(simple_lstm_model, device_ids=[0]).cuda() # 将模型转为cuda类型 # %% 定义优化器 opt = torch.optim.Adam( simple_lstm_model.parameters(), lr=learning_rate, weight_decay=weight_decay) # weight_decay代表L2正则化 # %% 定义loss函数 loss_func = mse_loss # %% 开始train start_time = time.time() epoch_loss = [] for i in range(epoch_num): epoch_start_time = time.time() simple_lstm_model.set_train() batch_loss = [] for index, (xb, yb) in enumerate(training_torch_set_dl): pred = simple_lstm_model(xb) ############################################################################## # simple_lstm_model.set_eval() # pred = simple_lstm_model(xb) # series(xb[0, :, -1].detach().cpu().numpy().flatten(), label='X', figure_size=(10, 2.4)) # ax = series(yb[0, :, -1].detach().cpu().numpy().flatten(), label='Truth', figure_size=(10, 2.4)) # ax = series(pred[0, :, -1].detach().cpu().numpy().flatten(), ax=ax, label='LSTM', figure_size=(10, 2.4)) ############################################################################## loss = loss_func(pred, yb) opt.zero_grad() loss.backward() opt.step() batch_loss.append(loss.item()) print(f"第{i + 1: d}个epoch, 第{index + 1: d}个batch, loss={loss}") print(f"第{i + 1: d}个epoch结束, 平均loss={np.mean(batch_loss)}") print(f"第{i + 1: d}个epoch结束, 耗时{time.time() - epoch_start_time}") epoch_loss.append(batch_loss) if (i % 1000 == 0) and (i != 0): try_to_find_folder_path_otherwise_make_one( model_save_path.parent) torch.save( simple_lstm_model, model_save_path.parent / (model_save_path.stem + f"_epoch_{i}" + model_save_path.suffix)) # 保存整个模型 torch.save(simple_lstm_model, model_save_path) # 保存训练时间和loss save_pkl_file(training_time_path, { 'time': time.time() - start_time, 'loss': epoch_loss }) return { 'model': torch.load(model_save_path), 'training_time_and_loss': load_pkl_file(training_time_path) }