コード例 #1
0
    def test_csv_pipeline(self):
        config = {
            "data_loader": {
                "name": "data_loader",
                "type": "csv",
                "file_path": "test_data/test.csv",
                "columns": {
                    "id": "id",
                    "data": "text",
                    "additional_columns": ["username"]
                },
            },
            "steps": [
                {
                    "name": "normalize_text",
                    "type": "lowercase",
                    "log_level": "INFO"
                },
            ],
        }
        loader = DataPreprocess(config)
        data = []
        for batch in loader.process_data():
            for item in batch:
                data.append(item["data"])

        self.assertEqual(4, len(data))
コード例 #2
0
    def test_list_pipeline(self):
        config = {
            "data_loader": {
                "type": "list"
            },
            "steps": [
                {
                    "name": "normalize_text",
                    "type": "lowercase",
                    "log_level": "INFO"
                },
            ],
        }
        loader = DataPreprocess(config)
        data = []
        for batch in loader.process_data(TEST_LIST):
            for item in batch:
                data.append(item["data"])

        test = [item.lower() for item in TEST_LIST]
        self.assertEqual(test, data)