def test_csv_pipeline(self): config = { "data_loader": { "name": "data_loader", "type": "csv", "file_path": "test_data/test.csv", "columns": { "id": "id", "data": "text", "additional_columns": ["username"] }, }, "steps": [ { "name": "normalize_text", "type": "lowercase", "log_level": "INFO" }, ], } loader = DataPreprocess(config) data = [] for batch in loader.process_data(): for item in batch: data.append(item["data"]) self.assertEqual(4, len(data))
def test_list_pipeline(self): config = { "data_loader": { "type": "list" }, "steps": [ { "name": "normalize_text", "type": "lowercase", "log_level": "INFO" }, ], } loader = DataPreprocess(config) data = [] for batch in loader.process_data(TEST_LIST): for item in batch: data.append(item["data"]) test = [item.lower() for item in TEST_LIST] self.assertEqual(test, data)