예제 #1
0
    def setup_training_data(self, train_file, target):
        # Loading training data
        self.train_data = data_io.get_data(setup.get_datasets_path(),
                                           train_file)

        # Setting what to predict
        self.target_column = target
예제 #2
0
 def setUp(self):
     self.data = data_io.get_data(setup.get_datasets_path(),
                                  "titanic_train.csv")
     self.data["Nonsense"] = np.nan
     self.data = self.data.apply(pd.to_numeric, errors='coerce')
     self.data.loc[self.data.shape[0]] = [np.nan] * self.data.shape[1]
     self.target = 'Survived'
     self.start = self.data.shape
예제 #3
0
    def standard_pred_type(self, filename, target, goal, message):
        # Arrange
        data = data_io.get_data(setup.get_datasets_path(), filename)

        # Act
        ml_type = dataset_insight.get_prediction_type(data[target])

        # Assert
        self.assertEqual(ml_type, goal, msg=message)
예제 #4
0
    def test_scaling(self):

        # Arrange
        data = data_io.get_data(setup.get_datasets_path(),
                                "winequality-red.csv")

        # Act
        target = data_scaling.scale_numeric_data(data)
        target = pd.DataFrame.as_matrix(target)

        # Assert
        self.assertAlmostEqual(target.mean(), 0, 5)
        self.assertAlmostEqual(target.std(), 1, 5)
예제 #5
0
    def test_data_split(self):

        # Arrange
        data = data_io.get_data(setup.get_datasets_path(),
                                "winequality-red.csv")

        # Act
        x_train, x_test, y_train, y_test = data_splitting.get_train_test(
            data, "quality")

        # Assert
        self.assertEqual(len(x_train[:, 0]), len(y_train))
        self.assertEqual(len(x_test[:, 0]), len(y_test))
        self.assertAlmostEqual(len(y_train) / len(y_test), 4, 2)
예제 #6
0
 def setUp(self):
     self.data = data_io.get_data(setup.get_datasets_path(),
                                  "winequality-red.csv")
     self.target = 'quality'
     self.start = self.data.shape
예제 #7
0
 def setUp(self):
     self.data = data_io.get_data(setup.get_datasets_path(), "titanic_train.csv")
예제 #8
0
 def setUp(self):
     self.data = data_io.get_data(setup.get_datasets_path(),
                                  "winequality-red.csv")
     self.strategy = filler_strategy.FillerStrategy(self.data)
예제 #9
0
 def setUp(self):
     self.data = data_io.get_data(setup.get_datasets_path(), "titanic_train.csv")
     self.data["Nonsense"] = np.nan
     self.data["Filled"] = 1
     self.data = self.data.apply(pd.to_numeric,errors='coerce')
예제 #10
0
 def setup_test_data(self, test_file):
     # Loading testing data
     self.test_data = data_io.get_data(setup.get_datasets_path(), test_file)