class TestDataSetSplitter(unittest.TestCase): def setUp(self): self._data = { 'target': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'feature_1': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'feature_2': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] } self._dataframe = pd.DataFrame(data=self._data) self._data_model = DataModel(self._dataframe) self._data_model.set_tf_feature_columns([ tf.feature_column.numeric_column('feature_1'), tf.feature_column.numeric_column('feature_2') ]) self._data_model.set_target_column('target') def test_split_data(self): splitter = DataSetSplitter(self._data_model) evaluation_data, train_data = splitter.split_by_ratio(ratios=[20, 80]) train_features = train_data.get_feature_columns() train_target = train_data.get_target_column() eval_features = evaluation_data.get_feature_columns() eval_target = evaluation_data.get_target_column() self.assertEqual(len(train_target), 8) self.assertEqual(len(train_features), 8) self.assertEqual(len(eval_target), 2) self.assertEqual(len(eval_features), 2)
def render_tf_feature_columns(self, data_model: DataModel): data_model.set_tf_feature_columns([]) for feature_column_info in self.feature_columns(): column_strategy = FeatureColumnStrategyFactory.get_strategy( feature_column_info['name'], feature_column_info['type'], data_model, self.feature_config()) feature_columns = column_strategy.build() for tf_feature_column in feature_columns: data_model.add_tf_feature_columns(tf_feature_column)