예제 #1
0
    def test_ifIsNotSupportedExportFile_raiseValueError(self):
        create_csv_file(self.fake_data_path_csv, predict_container=True)

        with self.assertRaises(ValueError):
            parse.main([
                self.a_fasttext_model_type,
                self.fake_data_path_csv,
                self.a_unsupported_data_path,
                "--device",
                self.cpu_device,
            ])
예제 #2
0
    def test_ifIsCSVFile_noColumnName_raiseValueError(self):
        create_csv_file(self.fake_data_path_csv, predict_container=True)

        with self.assertRaises(ValueError):
            parse.main([
                self.a_fasttext_model_type,
                self.fake_data_path_csv,
                self.csv_export_filename,
                "--device",
                self.cpu_device,
            ])
예제 #3
0
    def test_integration_cpu(self):
        create_pickle_file(self.fake_data_path_pickle, predict_container=True)

        parse.main([
            self.a_fasttext_model_type,
            self.fake_data_path_pickle,
            self.pickle_p_export_filename,
            "--device",
            self.cpu_device,
        ])

        export_path = generate_export_path(self.fake_data_path_pickle,
                                           self.pickle_p_export_filename)
        self.assertTrue(os.path.isfile(export_path))
예제 #4
0
 def test_integration_no_logging(self):
     with self._caplog.at_level(logging.INFO):
         create_pickle_file(self.fake_data_path_pickle,
                            predict_container=True)
         parse.main([
             self.a_fasttext_model_type,
             self.fake_data_path_pickle,
             self.pickle_p_export_filename,
             "--device",
             self.cpu_device,
             "--log",
             "False",
         ])
     self.assertEqual(0, len(self._caplog.records))
예제 #5
0
    def test_integration_csv(self):
        create_csv_file(self.fake_data_path_csv, predict_container=True)

        parse.main([
            self.a_fasttext_att_model_type,
            self.fake_data_path_csv,
            self.csv_export_filename,
            "--device",
            self.cpu_device,
            "--csv_column_name",
            "Address",
        ])

        export_path = generate_export_path(self.fake_data_path_csv,
                                           self.csv_export_filename)
        self.assertTrue(os.path.isfile(export_path))
예제 #6
0
    def test_ifPathToFastTextRetrainModel_thenUseFastTextRetrainModel(self):
        with self._caplog.at_level(logging.INFO):
            path_to_retrained_model = self.path_to_retrain_fasttext
            create_pickle_file(self.fake_data_path_pickle,
                               predict_container=True)

            parse.main([
                self.a_fasttext_model_type,
                self.fake_data_path_pickle,
                self.pickle_p_export_filename,
                "--device",
                self.cpu_device,
                "--path_to_retrained_model",
                path_to_retrained_model,
            ])

        expected_first_message = (
            f"Parsing dataset file {self.fake_data_path_pickle} using the parser "
            f"FastTextAddressParser")
        actual_first_message = self._caplog.records[0].message
        self.assertEqual(expected_first_message, actual_first_message)
예제 #7
0
    def test_ifPathToFakeRetrainModel_thenUseFakeRetrainModel(self):
        with self._caplog.at_level(logging.INFO):
            # We use the default path to fasttext model as a "retrain model path"
            path_to_retrained_model = os.path.join(os.path.expanduser("~"),
                                                   ".cache", "deepparse",
                                                   "fasttext.ckpt")
            create_pickle_file(self.fake_data_path_pickle,
                               predict_container=True)

            parse.main([
                self.a_fasttext_model_type,
                self.fake_data_path_pickle,
                self.pickle_p_export_filename,
                "--device",
                self.cpu_device,
                "--path_to_retrained_model",
                path_to_retrained_model,
            ])

        expected_first_message = (
            f"Parsing dataset file {self.fake_data_path_pickle} using the parser "
            f"FastTextAddressParser")
        actual_first_message = self._caplog.records[0].message
        self.assertEqual(expected_first_message, actual_first_message)
예제 #8
0
    def test_integration_logging(self):
        with self._caplog.at_level(logging.INFO):
            create_pickle_file(self.fake_data_path_pickle,
                               predict_container=True)
            parse.main([
                self.a_fasttext_model_type,
                self.fake_data_path_pickle,
                self.pickle_p_export_filename,
                "--device",
                self.cpu_device,
            ])
        expected_first_message = (
            f"Parsing dataset file {self.fake_data_path_pickle} using the parser "
            f"FastTextAddressParser")
        actual_first_message = self._caplog.records[0].message
        self.assertEqual(expected_first_message, actual_first_message)

        export_path = generate_export_path(self.fake_data_path_pickle,
                                           "a_file.p")
        expected_second_message = (
            f"4 addresses have been parsed.\n"
            f"The parsed addresses are outputted here: {export_path}")
        actual_second_message = self._caplog.records[1].message
        self.assertEqual(expected_second_message, actual_second_message)