Exemple #1
0
class TestDataset(unittest.TestCase):
    def setUp(self):
        self._data = {'col1': [1, 2], 'col2': [3, 4], 'col4': [5, 6]}
        self._dataframe = pd.DataFrame(data=self._data)
        self._dataset = DataModel(self._dataframe)

    def test_validate_columns_invalid(self):
        with self.assertRaises(RuntimeError):
            self._dataset.validate_columns(['col3'])

    def test_validate_columns(self):
        self._dataset.validate_columns(['col1'])

    def test_feature_columns(self):
        intended_columns = ['col1', 'col2']
        self._dataset.set_feature_columns(intended_columns)

        feature_columns = self._dataset.get_feature_columns()
        result_columns = list(feature_columns.columns.values)

        self.assertEqual(result_columns, intended_columns)

    def test_target_column(self):
        intended_column = 'col1'
        self._dataset.set_target_column(intended_column)

        target_column = self._dataset.get_target_column()

        self.assertEqual(target_column.tolist(), self._data[intended_column])
Exemple #2
0
class TestDataSetSplitter(unittest.TestCase):
    def setUp(self):
        self._data = {
            'target': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
            'feature_1': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
            'feature_2': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
        }

        self._dataframe = pd.DataFrame(data=self._data)
        self._data_model = DataModel(self._dataframe)
        self._data_model.set_tf_feature_columns([
            tf.feature_column.numeric_column('feature_1'),
            tf.feature_column.numeric_column('feature_2')
        ])

        self._data_model.set_target_column('target')

    def test_split_data(self):
        splitter = DataSetSplitter(self._data_model)
        evaluation_data, train_data = splitter.split_by_ratio(ratios=[20, 80])

        train_features = train_data.get_feature_columns()
        train_target = train_data.get_target_column()

        eval_features = evaluation_data.get_feature_columns()
        eval_target = evaluation_data.get_target_column()

        self.assertEqual(len(train_target), 8)
        self.assertEqual(len(train_features), 8)
        self.assertEqual(len(eval_target), 2)
        self.assertEqual(len(eval_features), 2)
Exemple #3
0
    def setUp(self):
        data_array = {'feat_A': [1, 2, 3], 'feat_B': [8, 6, 4], 'target': [9, 8, 7]}
        df2 = df = pd.DataFrame(data_array)

        train_model = DataModel(df)
        train_model.set_target_column('target')
        train_model.set_feature_columns(['feat_A', 'feat_B'])

        eval_model = DataModel(df2)
        eval_model.set_target_column('target')
        eval_model.set_feature_columns(['feat_A', 'feat_B'])

        self.arti = AI('test', 'test/test')
        self.arti.training_data = train_model
        self.arti.evaluation_data = eval_model