Beispiel #1
0
class LogisticRegressionExperiment(object):
    def __init__(self):
        self._data_set = get_pick_data("LogisticRegression")
        self._num_features = self._data_set.dynamic_features.shape[1]
        self._time_steps = 1
        self._n_output = 1
        self._model_format()
        self._check_path()

    def _model_format(self):
        learning_rate, max_loss, max_pace, ridge, batch_size, hidden_size, epoch, dropout = lr_setup.all
        self._model = LogisticRegression(
            num_features=self._num_features,
            time_steps=self._time_steps,
            n_output=self._n_output,
            batch_size=batch_size,
            epochs=epoch,
            output_n_epoch=ExperimentSetup.output_n_epochs,
            learning_rate=learning_rate,
            max_loss=max_loss,
            dropout=dropout,
            max_pace=max_pace,
            ridge=ridge)

    def _check_path(self):
        if not os.path.exists("result_9_16_0"):
            os.makedirs("result_9_16_0")
        self._filename = "result_9_16_0" + "/" + self._model.name + " " + \
                         time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())

    def do_experiments(self):
        n_output = 1
        dynamic_features = self._data_set.dynamic_features
        labels = self._data_set.labels
        # tol_test_index = np.zeros(shape=0, dtype=np.int32)
        tol_pred = np.zeros(shape=(0, n_output))
        tol_label = np.zeros(shape=(0, n_output), dtype=np.int32)
        train_dynamic_features, test_dynamic_features, train_labels, test_labels = \
            split_logistic_data(dynamic_features,labels)
        for i in range(5):
            train_dynamic_res, train_labels_res = imbalance_preprocess(
                train_dynamic_features[i], train_labels[i],
                'LogisticRegression')
            train_set = DataSet(train_dynamic_res, train_labels_res)
            test_set = DataSet(test_dynamic_features[i].reshape(-1, 92),
                               test_labels[i].reshape(-1, 1))
            self._model.fit(train_set, test_set)
            y_score = self._model.predict(test_set)
            tol_pred = np.vstack((tol_pred, y_score))
            tol_label = np.vstack((tol_label, test_labels[i]))
            print("Cross validation: {} of {}".format(i, 5),
                  time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))

        tol_test_index = np.arange(labels.shape[0] * labels.shape[1])
        evaluate(tol_test_index, tol_label, tol_pred, self._filename)
        self._model.close()
Beispiel #2
0
class LogisticRegressionExperiment(object):
    def __init__(self, event_type):
        self._event_type = event_type
        self._data_set = read_data(event_type)
        self._num_features = self._data_set.dynamic_feature.shape[2]
        self._time_steps = self._data_set.dynamic_feature.shape[1]
        self._n_output = self._data_set.labels.shape[1]
        print(event_type)
        self._model_format()
        self._check_path()

    def _model_format(self):
        if self._event_type == "qx":
            learning_rate, max_loss, max_pace, lasso, ridge = lr_qx_setup.all
        elif self._event_type == "cx":
            learning_rate, max_loss, max_pace, lasso, ridge = lr_cx_setup.all
        else:
            learning_rate, max_loss, max_pace, lasso, ridge = lr_xycj_setup.all
        self._model = LogisticRegression(
            num_features=self._num_features,
            time_steps=self._time_steps,
            n_output=self._n_output,
            batch_size=ExperimentSetup.batch_size,
            epochs=ExperimentSetup.epochs,
            output_n_epoch=ExperimentSetup.output_n_epochs,
            learning_rate=learning_rate,
            max_loss=max_loss,
            max_pace=max_pace,
            lasso=lasso,
            ridge=ridge)

    def _check_path(self):
        if not os.path.exists("average_result_cx_TEST" + self._event_type):
            os.makedirs("average_result_cx_TEST" + self._event_type)
        self._filename = "average_result_cx_TEST" + self._event_type + "/" + self._model.name + " " + time.strftime(
            "%Y-%m-%d-%H-%M-%S", time.localtime())

    def do_experiments(self):
        dynamic_feature = self._data_set.dynamic_feature
        labels = self._data_set.labels
        kf = sklearn.model_selection.StratifiedKFold(
            n_splits=ExperimentSetup.kfold, shuffle=False)

        n_output = labels.shape[1]  # classes

        tol_test_index = np.zeros(shape=0, dtype=np.int32)
        tol_pred = np.zeros(shape=(0, n_output))
        tol_label = np.zeros(shape=(0, n_output), dtype=np.int32)
        i = 1
        for train_idx, test_idx in kf.split(X=dynamic_feature,
                                            y=labels.reshape(-1)):  # 五折交叉
            train_dynamic = dynamic_feature[train_idx]
            train_y = labels[train_idx]
            train_dynamic_res, train_y_res = imbalance_preprocess(
                train_dynamic, train_y)  # SMOTE过采样方法处理不平衡数据集

            test_dynamic = dynamic_feature[test_idx]
            test_y = labels[test_idx]

            train_set = DataSet(train_dynamic_res, train_y_res)
            test_set = DataSet(test_dynamic, test_y)

            self._model.fit(train_set, test_set, self._event_type)

            y_score = self._model.predict(test_set)

            tol_test_index = np.concatenate((tol_test_index, test_idx))
            tol_pred = np.vstack((tol_pred, y_score))
            tol_label = np.vstack((tol_label, test_y))
            print(
                "Cross validation: {} of {}".format(i, ExperimentSetup.kfold),
                time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
            i += 1
        evaluate(tol_test_index, tol_label, tol_pred, self._filename)
        self._model.close()