Example #1
0
 def test_forecast(self):
     # 进行模型训练
     # train model
     print(Colours.yellow("--- CNN forecast---"))
     source_data = unserialize_pickle(self.data_source_dump)
     stat_dict = unserialize_json(self.stat_file)
     data_flow = unserialize_numpy(self.flow_file)
     t_s_dict = unserialize_json(self.t_s_dict_file)
     model_data = DataModel(source_data, data_flow, t_s_dict, stat_dict)
     model_dict = model_data.data_source.data_config.model_dict
     # model_file = os.path.join(model_dict["dir"]["Out"], "model.yaml")
     # weight_file = os.path.join(model_dict["dir"]["Out"], "weights.h5")
     # bo_json_file = os.path.join(model_dict["dir"]["Out"], "bo_logs.json")
     # bo_json = unserialize_bo_json(bo_json_file)
     # n_input = int(bo_json["params"]["n_input"])
     n_input = int(14)
     try_cnn = TryCnn()
     data, targets = routing_cnn.to_supervised(
         model_data.load_data(model_dict), n_input)
     obs_value, pred_value = cnn_test(try_cnn,
                                      X=data,
                                      Y=targets,
                                      stat_dict=stat_dict)
     # obs_value, pred_value = cnn_test(data, targets, stat_dict, model_file=model_file, weight_file=weight_file)
     print("the observe value:", obs_value)
     print("the predict value:", pred_value)
     serialize_numpy(obs_value, self.obs_file)
     serialize_numpy(pred_value, self.pred_file)
Example #2
0
    def test_forecast_data_model(self):
        print(Colours.yellow("--- reading CNN---"))
        config_file = definitions.CONFIG_FILE

        # 读取模型配置文件
        config_data = DataConfig(config_file)
        # 准备训练数据
        source_data = DataSource(config_data,
                                 config_data.model_dict["data"]["tRangeTest"])
        # 构建输入数据类对象
        model_data = DataModel(source_data)
        # 序列化保存对象
        dir_temp = source_data.all_configs["temp_dir"]
        source_file = self.data_source_dump
        stat_file = self.stat_file
        flow_file = os.path.join(dir_temp, 'flow_test')
        t_s_dict_file = self.t_s_dict_file

        # 存储data_model,因为data_model里的数据如果直接序列化会比较慢,所以各部分分别序列化,dict的直接序列化为json文件,数据的HDF5
        serialize_pickle(source_data, source_file)
        serialize_json(model_data.stat_dict, stat_file)
        serialize_numpy(model_data.data_flow, flow_file)
        serialize_json(model_data.t_s_dict, t_s_dict_file)
Example #3
0
                                         "expGamma": (-4, -1)
                                     },
                                     random_state=1234,
                                     verbose=2)
    optimizer.maximize(n_iter=10)

    print("Final result:", optimizer.max)
    optimizer_max = optimizer.max
    return optimizer_max


# COMMAND ----------

# Optimizing Support Vector Machine
# Run bayesian optimization function with first batch of iterations
print(Colours.yellow("--- Optimizing SVM ---"))
optimize_svc(data, targets)

# COMMAND ----------


###################### Bayesian Optimization ###########
###################### Random Forest Classification ###########
def rfc_cv(n_estimators, min_samples_split, max_features, data, targets):
    """Random Forest cross validation.
    This function will instantiate a random forest classifier with parameters
    n_estimators, min_samples_split, and max_features. Combined with data and
    targets this will in turn be used to perform cross validation. The result
    of cross validation is returned.
    Our goal is to find combinations of n_estimators, min_samples_split, and
    max_features that minimzes the log loss.
Example #4
0
        f=augmentation_wrapper,
        pbounds={
            "AddSentDiverse": (0, 1.0),
            "AddKSentDiverse": (0, 1.0),
            "AddAnswerPosition": (0, 1.0),
            "InvalidateAnswer": (0, 1.0),
            "PerturbAnswer": (0, 1.0),
            "AddSentDiverse_PerturbAnswer": (0, 1.0),
            "AddKSentDiverse_PerturbAnswer": (0, 1.0),
            "AddAnswerPosition_PerturbAnswer": (0, 1.0),
            "PerturbQuestion": (0, 1.0),
            "AddSentDiverse_PerturbQuestion": (0, 1.0),
            "AddAnswerPosition_PerturbQuestion": (0, 1.0)
        },
        random_state=1234,
        verbose=2)
    optimizer.maximize(
        init_points=20,
        n_iter=100
        # What follows are GP regressor parameters
        # alpha=1e-3,
        # n_restarts_optimizer=5
    )
    print("Final result:", optimizer.max)


if __name__ == "__main__":

    print(Colours.yellow("--- Optimizing Roberta ---"))
    optimize_roberta()
        """Wrapper of SVC cross validation.

        Notice how we transform between regular and log scale. While this
        is not technically necessary, it greatly improves the performance
        of the optimizer.
        """
        return gms_function_value(capacity_1, capacity_2, capacity_3,
                                  capacity_4)

    optimizer = BayesianOptimization(f=gms_value,
                                     pbounds={
                                         "capacity_1": (0.000001, 0.000001),
                                         "capacity_2": (0.000001, 0.000007),
                                         "capacity_3": (0.000001, 0.000007),
                                         "capacity_4": (0.000001, 0.000007)
                                     },
                                     verbose=0)
    optimizer.maximize(n_iter=50)

    print("Final result:", optimizer.max)


if __name__ == "__main__":
    print(Colours.yellow("--- Optimizing gms ---"))
    optimize_gms()
    print('111')
    df_target = pd.DataFrame()
    df_target['target'] = target_y
    df_target['action'] = target_x
    df_target.to_csv('process_value.csv', index=False)
import definitions
from bayes_opt.util import Colours
from data import *
from hydroDL import *

print('Starting ...')

configFile = definitions.CONFIG_FILE
# 读取模型配置文件
configData = DataConfig(configFile)
# 准备训练数据
sourceData = DataSource(configData,
                        configData.model_dict["data"]["tRangeTrain"])
# 构建输入数据类对象
dataModel = DataModel(sourceData)

# 进行模型训练
# train model
print(Colours.yellow("--- Optimizing CNN ---"))
optimize_cnn(dataModel)
Example #7
0
 def test_shap(self):
     print(Colours.yellow("--- CNN shap---"))
     source_data = unserialize_pickle(self.data_source_dump)
     stat_dict = unserialize_json(self.stat_file)
     data_flow = unserialize_numpy(self.flow_file)
     t_s_dict = unserialize_json(self.t_s_dict_file)
     model_data = DataModel(source_data, data_flow, t_s_dict, stat_dict)
     model_dict = model_data.data_source.data_config.model_dict
     n_input = int(14)
     try_cnn = TryCnn()
     data, targets = routing_cnn.to_supervised(
         model_data.load_data(model_dict), n_input)
     try_cnn.load_state_dict(
         torch.load("F:\科研类\codes\hydro-routing-cnn\checkpoint.pt"))
     # x1 = data.reshape(715, 1, -1)
     # x = x1.reshape(-1, 1, 1, x1.shape[2])
     x = data.reshape(-1, 1, data.shape[1], data.shape[2])
     x = torch.from_numpy(x).float()
     try_cnn.eval()
     # x_pred = try_cnn(x[301:306])
     # print(x[301:306])
     # print(x_pred)
     print("======计算SHAP========")
     # 新建一个解释器(模型,数据)
     background = x[np.random.choice(x.shape[0], 100, replace=False)]
     e = shap.DeepExplainer(try_cnn, background)
     # e = shap.DeepExplainer(try_cnn, x)
     shap_values = e.shap_values(x)
     shap_values_stations_days = np.abs(shap_values).sum(axis=0).reshape(
         14, data.shape[2])
     shap_days = shap_values_stations_days.sum(axis=1)
     shap_stations = shap_values_stations_days.sum(axis=0)
     # 计算base_line
     # y_base = e.expected_value
     # print("y_base的值:", y_base)
     # print("y_base+shap值的和:",y_base+shap_values.sum())
     shap_values_array = shap_values.reshape(-1,
                                             data.shape[1] * data.shape[2])
     shap_arrays_values = []
     for i in range(shap_values_array.shape[0]):
         new_array = np.zeros(
             (shap_values_array.shape[0] - 1) * data.shape[2])
         if i == 0:
             ndarray = np.append(shap_values_array[i], new_array)
         elif i == shap_values_array.shape[0] - 1:
             ndarray = np.insert(shap_values_array[i], 0, new_array)
         else:
             ndarray = np.pad(
                 shap_values_array[i],
                 (i * data.shape[2],
                  (shap_values_array.shape[0] - 1 - i) * data.shape[2]),
                 'constant')
         shap_arrays_values.append(ndarray)
     shap_arrays_values = np.array(shap_arrays_values)
     shap_arrays_values_abs = np.abs(shap_arrays_values).sum(
         axis=0).reshape(-1, data.shape[2])
     print(shap_arrays_values_abs)
     shap_values_days_stations = []
     for j in range(shap_arrays_values_abs.shape[0]):
         if j < 14:
             shap_values_day_state = shap_arrays_values_abs[j] / (j + 1)
         elif j >= shap_arrays_values_abs.shape[0] - 14:
             shap_values_day_state = shap_arrays_values_abs[j] / (
                 shap_arrays_values_abs.shape[0] - j)
         else:
             shap_values_day_state = shap_arrays_values_abs[j] / 14
         shap_values_days_stations.append(shap_values_day_state)
     shap_values_days_stations = np.array(shap_values_days_stations)
     print(shap_values_days_stations)
     serialize_numpy(shap_values_days_stations,
                     self.shap_values_days_states_file)
     serialize_numpy(shap_days, self.shap_days_file)
     serialize_numpy(shap_stations, self.shap_stations_file)
     serialize_numpy(shap_values_stations_days,
                     self.shap_values_stations_days_file)