def setup_training_data(self, train_file, target): # Loading training data self.train_data = data_io.get_data(setup.get_datasets_path(), train_file) # Setting what to predict self.target_column = target
def setUp(self): self.data = data_io.get_data(setup.get_datasets_path(), "titanic_train.csv") self.data["Nonsense"] = np.nan self.data = self.data.apply(pd.to_numeric, errors='coerce') self.data.loc[self.data.shape[0]] = [np.nan] * self.data.shape[1] self.target = 'Survived' self.start = self.data.shape
def standard_pred_type(self, filename, target, goal, message): # Arrange data = data_io.get_data(setup.get_datasets_path(), filename) # Act ml_type = dataset_insight.get_prediction_type(data[target]) # Assert self.assertEqual(ml_type, goal, msg=message)
def test_scaling(self): # Arrange data = data_io.get_data(setup.get_datasets_path(), "winequality-red.csv") # Act target = data_scaling.scale_numeric_data(data) target = pd.DataFrame.as_matrix(target) # Assert self.assertAlmostEqual(target.mean(), 0, 5) self.assertAlmostEqual(target.std(), 1, 5)
def test_data_split(self): # Arrange data = data_io.get_data(setup.get_datasets_path(), "winequality-red.csv") # Act x_train, x_test, y_train, y_test = data_splitting.get_train_test( data, "quality") # Assert self.assertEqual(len(x_train[:, 0]), len(y_train)) self.assertEqual(len(x_test[:, 0]), len(y_test)) self.assertAlmostEqual(len(y_train) / len(y_test), 4, 2)
def setUp(self): self.data = data_io.get_data(setup.get_datasets_path(), "winequality-red.csv") self.target = 'quality' self.start = self.data.shape
def setUp(self): self.data = data_io.get_data(setup.get_datasets_path(), "titanic_train.csv")
def setUp(self): self.data = data_io.get_data(setup.get_datasets_path(), "winequality-red.csv") self.strategy = filler_strategy.FillerStrategy(self.data)
def setUp(self): self.data = data_io.get_data(setup.get_datasets_path(), "titanic_train.csv") self.data["Nonsense"] = np.nan self.data["Filled"] = 1 self.data = self.data.apply(pd.to_numeric,errors='coerce')
def setup_test_data(self, test_file): # Loading testing data self.test_data = data_io.get_data(setup.get_datasets_path(), test_file)