class LogisticRegressionExperiment(object): def __init__(self): self._data_set = get_pick_data("LogisticRegression") self._num_features = self._data_set.dynamic_features.shape[1] self._time_steps = 1 self._n_output = 1 self._model_format() self._check_path() def _model_format(self): learning_rate, max_loss, max_pace, ridge, batch_size, hidden_size, epoch, dropout = lr_setup.all self._model = LogisticRegression( num_features=self._num_features, time_steps=self._time_steps, n_output=self._n_output, batch_size=batch_size, epochs=epoch, output_n_epoch=ExperimentSetup.output_n_epochs, learning_rate=learning_rate, max_loss=max_loss, dropout=dropout, max_pace=max_pace, ridge=ridge) def _check_path(self): if not os.path.exists("result_9_16_0"): os.makedirs("result_9_16_0") self._filename = "result_9_16_0" + "/" + self._model.name + " " + \ time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) def do_experiments(self): n_output = 1 dynamic_features = self._data_set.dynamic_features labels = self._data_set.labels # tol_test_index = np.zeros(shape=0, dtype=np.int32) tol_pred = np.zeros(shape=(0, n_output)) tol_label = np.zeros(shape=(0, n_output), dtype=np.int32) train_dynamic_features, test_dynamic_features, train_labels, test_labels = \ split_logistic_data(dynamic_features,labels) for i in range(5): train_dynamic_res, train_labels_res = imbalance_preprocess( train_dynamic_features[i], train_labels[i], 'LogisticRegression') train_set = DataSet(train_dynamic_res, train_labels_res) test_set = DataSet(test_dynamic_features[i].reshape(-1, 92), test_labels[i].reshape(-1, 1)) self._model.fit(train_set, test_set) y_score = self._model.predict(test_set) tol_pred = np.vstack((tol_pred, y_score)) tol_label = np.vstack((tol_label, test_labels[i])) print("Cross validation: {} of {}".format(i, 5), time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) tol_test_index = np.arange(labels.shape[0] * labels.shape[1]) evaluate(tol_test_index, tol_label, tol_pred, self._filename) self._model.close()
class LogisticRegressionExperiment(object): def __init__(self, event_type): self._event_type = event_type self._data_set = read_data(event_type) self._num_features = self._data_set.dynamic_feature.shape[2] self._time_steps = self._data_set.dynamic_feature.shape[1] self._n_output = self._data_set.labels.shape[1] print(event_type) self._model_format() self._check_path() def _model_format(self): if self._event_type == "qx": learning_rate, max_loss, max_pace, lasso, ridge = lr_qx_setup.all elif self._event_type == "cx": learning_rate, max_loss, max_pace, lasso, ridge = lr_cx_setup.all else: learning_rate, max_loss, max_pace, lasso, ridge = lr_xycj_setup.all self._model = LogisticRegression( num_features=self._num_features, time_steps=self._time_steps, n_output=self._n_output, batch_size=ExperimentSetup.batch_size, epochs=ExperimentSetup.epochs, output_n_epoch=ExperimentSetup.output_n_epochs, learning_rate=learning_rate, max_loss=max_loss, max_pace=max_pace, lasso=lasso, ridge=ridge) def _check_path(self): if not os.path.exists("average_result_cx_TEST" + self._event_type): os.makedirs("average_result_cx_TEST" + self._event_type) self._filename = "average_result_cx_TEST" + self._event_type + "/" + self._model.name + " " + time.strftime( "%Y-%m-%d-%H-%M-%S", time.localtime()) def do_experiments(self): dynamic_feature = self._data_set.dynamic_feature labels = self._data_set.labels kf = sklearn.model_selection.StratifiedKFold( n_splits=ExperimentSetup.kfold, shuffle=False) n_output = labels.shape[1] # classes tol_test_index = np.zeros(shape=0, dtype=np.int32) tol_pred = np.zeros(shape=(0, n_output)) tol_label = np.zeros(shape=(0, n_output), dtype=np.int32) i = 1 for train_idx, test_idx in kf.split(X=dynamic_feature, y=labels.reshape(-1)): # 五折交叉 train_dynamic = dynamic_feature[train_idx] train_y = labels[train_idx] train_dynamic_res, train_y_res = imbalance_preprocess( train_dynamic, train_y) # SMOTE过采样方法处理不平衡数据集 test_dynamic = dynamic_feature[test_idx] test_y = labels[test_idx] train_set = DataSet(train_dynamic_res, train_y_res) test_set = DataSet(test_dynamic, test_y) self._model.fit(train_set, test_set, self._event_type) y_score = self._model.predict(test_set) tol_test_index = np.concatenate((tol_test_index, test_idx)) tol_pred = np.vstack((tol_pred, y_score)) tol_label = np.vstack((tol_label, test_y)) print( "Cross validation: {} of {}".format(i, ExperimentSetup.kfold), time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) i += 1 evaluate(tol_test_index, tol_label, tol_pred, self._filename) self._model.close()