예제 #1
0
    def __separate_data(self, data_filename):
        data_analyzer = DataAnalyzer(data_filename)
        self.mean_list = data_analyzer.get_mean_list()
        self.stdev_list = data_analyzer.get_stdev_list()

        # feature scale, and add column of 1s to X
        all_X = data_analyzer.X
        all_X = self.__apply_feature_scaling(all_X)
        all_X = np.c_[np.ones(all_X.shape[0]), all_X]
        all_Y = data_analyzer.Y
        all_data = np.c_[all_X, all_Y]

        np.random.shuffle(all_data)
        split_row_index = int(all_data.shape[0] *
                              0.8)  # top 80% of rows will be for training

        training_data = all_data[:split_row_index, :]
        validation_data = all_data[split_row_index:, :]

        self.training_X = training_data[:, :-1]
        self.training_Y = training_data[:, -1]
        self.training_Y = self.training_Y.reshape(self.training_Y.shape[0], 1)

        self.validation_X = validation_data[:, :-1]
        self.validation_Y = validation_data[:, -1]
        self.validation_Y = self.validation_Y.reshape(
            self.validation_Y.shape[0], 1)
예제 #2
0
 def __init_training_data(self, training_filename):
     data_analyzer = DataAnalyzer(training_filename)
     self.mean_list = data_analyzer.get_mean_list()
     self.stdev_list = data_analyzer.get_stdev_list()
     # feature scale, and add column of 1s to X
     self.training_X = data_analyzer.X
     self.training_X = self.__apply_feature_scaling(self.training_X)
     self.training_X = np.c_[np.ones(self.training_X.shape[0]),
                             self.training_X]
     self.training_Y = data_analyzer.Y