예제 #1
0
 def test_url(self):
     config = templates.pipeline()
     config['data_loader'] = templates.data_loader_single_item_loader()
     config['steps'].append(templates.normalize_text_remove_urls())
     loader = DataPreprocess(config)
     process = loader.process_item("remove url http://www.google.com")
     self.assertEqual("remove url ", process["data"])
예제 #2
0
 def test_whitespace(self):
     config = templates.pipeline()
     config['data_loader'] = templates.data_loader_single_item_loader()
     config['steps'].append(templates.normalize_text_remove_whitespace())
     loader = DataPreprocess(config)
     process = loader.process_item(" remove  whitespace ")
     self.assertEqual("remove whitespace", process["data"])
예제 #3
0
 def test_punctunation(self):
     config = templates.pipeline()
     config['data_loader'] = templates.data_loader_single_item_loader()
     config['steps'].append(templates.normalize_text_remove_punctuation())
     loader = DataPreprocess(config)
     process = loader.process_item(TEST_DATA)
     self.assertEqual("This isn t a TEST sentences ", process["data"])
예제 #4
0
 def test_lower(self):
     config = templates.pipeline()
     config['data_loader'] = templates.data_loader_single_item_loader()
     config['steps'].append(templates.normalize_text_lowercase())
     loader = DataPreprocess(config)
     process = loader.process_item(TEST_DATA)
     self.assertEqual(TEST_DATA.lower(), process["data"])
예제 #5
0
 def test_stopwords(self):
     config = templates.pipeline()
     config['data_loader'] = templates.data_loader_single_item_loader()
     config['steps'].append(templates.normalize_text_lowercase())
     config['steps'].append(templates.normalize_text_remove_stopwords())
     loader = DataPreprocess(config)
     process = loader.process_item(TEST_DATA)
     self.assertEqual("isn't test sentences!", process["data"])
예제 #6
0
 def test_lemmatizer(self):
     config = templates.pipeline()
     config['data_loader'] = templates.data_loader_single_item_loader()
     config['steps'].append(templates.normalize_text_lowercase())
     config['steps'].append(templates.normalize_text_lemmatizer())
     loader = DataPreprocess(config)
     process = loader.process_item("How many cities are there?")
     self.assertEqual("how many city are there?", process["data"])
예제 #7
0
 def test_contractions(self):
     config = templates.pipeline()
     config['data_loader'] = templates.data_loader_single_item_loader()
     config['steps'].append(templates.normalize_text_lowercase())
     config['steps'].append(templates.normalize_text_expand_contractions())
     loader = DataPreprocess(config)
     process = loader.process_item(TEST_DATA)
     self.assertEqual("this is not a test sentences!", process["data"])
 def test_single_item_pipeline(self):
     config = {
         "data_loader": {
             "type": "single_item"
         },
         "steps": [
             {
                 "name": "normalize_text",
                 "type": "lowercase",
                 "log_level": "INFO"
             },
         ],
     }
     loader = DataPreprocess(config)
     process = loader.process_item(TEST_DATA)
     self.assertEqual(TEST_DATA.lower(), process["data"])