Example #1
0
    def train_test_split_matrix(self, split=0.2, use_saved=True):
        """
        if use_saved is true, the pickle files of train and test are loaded. If not, we split the data into train and
        test datasets.

        :param split: the test proportion of the data; has to be a float between 0 and 1
        :param use_saved: if true, we use the local pickled copies of the data
        """
        if use_saved:
            self.train = Util.load_pickle_object(
                self.config.get_train_data_loc())
            self.test = Util.load_pickle_object(
                self.config.get_test_data_loc())
        else:
            matrix_copy = self.matrix.copy(deep=True)
            split_point = int(matrix_copy.shape[1] * split)

            # here we shuffle the columns to prevent overfitting
            shuffled_columns = matrix_copy.columns.tolist()
            shuffle(shuffled_columns)
            matrix_copy = matrix_copy[shuffled_columns]

            train_matrix = matrix_copy.copy(deep=True)
            train_matrix.iloc[:, split_point:] = 0

            test_matrix = matrix_copy.copy(deep=True)
            test_matrix.iloc[:, :split_point] = 0

            Util.pickle_object(self.config.get_train_data_loc(), train_matrix)
            Util.pickle_object(self.config.get_test_data_loc(), test_matrix)

            self.train = train_matrix
            self.test = test_matrix
Example #2
0
 def load_test_matrix(self):
     self.test = Util.load_pickle_object(self.config.get_test_data_loc())
     return self.test
Example #3
0
 def load_train_matrix(self):
     self.train = Util.load_pickle_object(self.config.get_train_data_loc())
     return self.train
Example #4
0
 def load_matrix(self):
     self.matrix = Util.load_pickle_object(self.config.get_matrix_loc())
     return self.matrix
Example #5
0
 def load_data_dict(self):
     self.raw_data = Util.load_pickle_object(
         self.config.get_data_dict_loc())
     return self.raw_data