class TestDataset(unittest.TestCase): def setUp(self): self._data = {'col1': [1, 2], 'col2': [3, 4], 'col4': [5, 6]} self._dataframe = pd.DataFrame(data=self._data) self._dataset = DataModel(self._dataframe) def test_validate_columns_invalid(self): with self.assertRaises(RuntimeError): self._dataset.validate_columns(['col3']) def test_validate_columns(self): self._dataset.validate_columns(['col1']) def test_feature_columns(self): intended_columns = ['col1', 'col2'] self._dataset.set_feature_columns(intended_columns) feature_columns = self._dataset.get_feature_columns() result_columns = list(feature_columns.columns.values) self.assertEqual(result_columns, intended_columns) def test_target_column(self): intended_column = 'col1' self._dataset.set_target_column(intended_column) target_column = self._dataset.get_target_column() self.assertEqual(target_column.tolist(), self._data[intended_column])
class TestDataSetSplitter(unittest.TestCase): def setUp(self): self._data = { 'target': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'feature_1': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'feature_2': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] } self._dataframe = pd.DataFrame(data=self._data) self._data_model = DataModel(self._dataframe) self._data_model.set_tf_feature_columns([ tf.feature_column.numeric_column('feature_1'), tf.feature_column.numeric_column('feature_2') ]) self._data_model.set_target_column('target') def test_split_data(self): splitter = DataSetSplitter(self._data_model) evaluation_data, train_data = splitter.split_by_ratio(ratios=[20, 80]) train_features = train_data.get_feature_columns() train_target = train_data.get_target_column() eval_features = evaluation_data.get_feature_columns() eval_target = evaluation_data.get_target_column() self.assertEqual(len(train_target), 8) self.assertEqual(len(train_features), 8) self.assertEqual(len(eval_target), 2) self.assertEqual(len(eval_features), 2)
def setUp(self): data_array = {'feat_A': [1, 2, 3], 'feat_B': [8, 6, 4], 'target': [9, 8, 7]} df2 = df = pd.DataFrame(data_array) train_model = DataModel(df) train_model.set_target_column('target') train_model.set_feature_columns(['feat_A', 'feat_B']) eval_model = DataModel(df2) eval_model.set_target_column('target') eval_model.set_feature_columns(['feat_A', 'feat_B']) self.arti = AI('test', 'test/test') self.arti.training_data = train_model self.arti.evaluation_data = eval_model