Example #1
0
    def test_integration_user_define_tag_separator_fn(self):
        def internal_conversion_of_list_to_comma_separated_list(tags):
            return str(tags).replace("[", "").replace("]", "").replace("'", "")

        number_of_data_points = 4
        create_csv_file(
            self.a_data_container_path,
            number_of_data_points=number_of_data_points,
            reformat_list_fn=
            internal_conversion_of_list_to_comma_separated_list,
        )

        csv_dataset_container = CSVDatasetContainer(
            self.a_data_container_path,
            column_names=default_csv_column_name,
            tag_seperator_reformat_fn=comma_separated_list_reformat,
        )

        self._test_integration(number_of_data_points, csv_dataset_container)

        number_of_data_points = 5
        create_csv_file(
            self.a_data_container_path,
            number_of_data_points=number_of_data_points,
            reformat_list_fn=
            internal_conversion_of_list_to_comma_separated_list,
        )

        csv_dataset_container = CSVDatasetContainer(
            self.a_data_container_path,
            column_names=default_csv_column_name,
            tag_seperator_reformat_fn=comma_separated_list_reformat,
        )

        self._test_integration(number_of_data_points, csv_dataset_container)
Example #2
0
    def test_givenACSVDatasetContainer_whenGetOneItem_thenReturnTheCorrectItem(
            self):
        create_csv_file(self.a_data_container_path)

        csv_dataset_container = CSVDatasetContainer(
            self.a_data_container_path, column_names=default_csv_column_name)

        # first data point
        idx = 0
        expected_address = base_string.format(idx)
        expected_tags_idx = a_tags_sequence

        actual_address, actual_tags_idx = csv_dataset_container[idx]
        self.assertEqual(expected_address, actual_address)
        self.assertListEqual(expected_tags_idx, actual_tags_idx)

        # second data point
        idx = 1
        expected_address = base_string.format(idx)
        expected_tags_idx = a_tags_sequence

        actual_address, actual_tags_idx = csv_dataset_container[idx]
        self.assertEqual(expected_address, actual_address)
        self.assertListEqual(expected_tags_idx, actual_tags_idx)

        # third data point
        idx = 2
        expected_address = base_string.format(idx)
        expected_tags_idx = a_tags_sequence

        actual_address, actual_tags_idx = csv_dataset_container[idx]
        self.assertEqual(expected_address, actual_address)
        self.assertListEqual(expected_tags_idx, actual_tags_idx)
Example #3
0
    def test_integration_user_define_column_names(self):
        user_define_column_names = ["a_name", "Another_name"]
        number_of_data_points = 4
        create_csv_file(
            self.a_data_container_path,
            number_of_data_points=number_of_data_points,
            column_names=user_define_column_names,
        )

        csv_dataset_container = CSVDatasetContainer(
            self.a_data_container_path, column_names=user_define_column_names)

        self._test_integration(number_of_data_points, csv_dataset_container)

        number_of_data_points = 5
        create_csv_file(
            self.a_data_container_path,
            number_of_data_points=number_of_data_points,
            column_names=user_define_column_names,
        )

        csv_dataset_container = CSVDatasetContainer(
            self.a_data_container_path, column_names=user_define_column_names)

        self._test_integration(number_of_data_points, csv_dataset_container)
Example #4
0
    def test_integration_user_define_separator(self):
        separator = ";"
        number_of_data_points = 4
        create_csv_file(self.a_data_container_path,
                        number_of_data_points=number_of_data_points,
                        separator=separator)

        csv_dataset_container = CSVDatasetContainer(
            self.a_data_container_path,
            column_names=default_csv_column_name,
            separator=separator)

        self._test_integration(number_of_data_points, csv_dataset_container)

        number_of_data_points = 5
        create_csv_file(self.a_data_container_path,
                        number_of_data_points=number_of_data_points,
                        separator=separator)

        csv_dataset_container = CSVDatasetContainer(
            self.a_data_container_path,
            column_names=default_csv_column_name,
            separator=separator)

        self._test_integration(number_of_data_points, csv_dataset_container)
Example #5
0
    def test_ifIsNotSupportedExportFile_raiseValueError(self):
        create_csv_file(self.fake_data_path_csv, predict_container=True)

        with self.assertRaises(ValueError):
            parse.main([
                self.a_fasttext_model_type,
                self.fake_data_path_csv,
                self.a_unsupported_data_path,
                "--device",
                self.cpu_device,
            ])
Example #6
0
    def test_ifIsCSVFile_noColumnName_raiseValueError(self):
        create_csv_file(self.fake_data_path_csv, predict_container=True)

        with self.assertRaises(ValueError):
            parse.main([
                self.a_fasttext_model_type,
                self.fake_data_path_csv,
                self.csv_export_filename,
                "--device",
                self.cpu_device,
            ])
Example #7
0
    def test_integration_csv(self):
        create_csv_file(self.fake_data_path_csv, predict_container=True)

        parse.main([
            self.a_fasttext_att_model_type,
            self.fake_data_path_csv,
            self.csv_export_filename,
            "--device",
            self.cpu_device,
            "--csv_column_name",
            "Address",
        ])

        export_path = generate_export_path(self.fake_data_path_csv,
                                           self.csv_export_filename)
        self.assertTrue(os.path.isfile(export_path))
Example #8
0
    def test_integration(self):
        number_of_data_points = 4
        create_csv_file(
            self.a_data_container_path,
            number_of_data_points=number_of_data_points,
        )

        csv_dataset_container = CSVDatasetContainer(
            self.a_data_container_path, column_names=default_csv_column_name)
        self._test_integration(number_of_data_points, csv_dataset_container)

        number_of_data_points = 5
        create_csv_file(
            self.a_data_container_path,
            number_of_data_points=number_of_data_points,
        )

        csv_dataset_container = CSVDatasetContainer(
            self.a_data_container_path, column_names=default_csv_column_name)
        self._test_integration(number_of_data_points, csv_dataset_container)
Example #9
0
    def test_integration_predict_container(self):
        number_of_data_points = 4
        create_csv_file(self.a_data_container_path,
                        number_of_data_points=number_of_data_points,
                        predict_container=False)

        csv_dataset_container = CSVDatasetContainer(
            self.a_data_container_path,
            column_names=["Address"],
            is_training_container=False)
        self._test_integration(number_of_data_points, csv_dataset_container)

        number_of_data_points = 5
        create_csv_file(self.a_data_container_path,
                        number_of_data_points=number_of_data_points,
                        predict_container=False)

        csv_dataset_container = CSVDatasetContainer(
            self.a_data_container_path,
            column_names=["Address"],
            is_training_container=False)
        self._test_integration(number_of_data_points, csv_dataset_container)
Example #10
0
    def test_givenAPickleDatasetContainer_whenGetSlice_thenReturnTheCorrectItems(
            self):
        create_csv_file(self.a_data_container_path)

        csv_dataset_container = CSVDatasetContainer(
            self.a_data_container_path, column_names=default_csv_column_name)

        start_idx = 0
        end_idx = 2
        expected_addresses = [
            base_string.format(idx) for idx in range(start_idx, end_idx)
        ]
        expected_tags_idxs = [a_tags_sequence] * (end_idx - start_idx)

        sliced_addresses = csv_dataset_container[start_idx:end_idx]
        self.assertIsInstance(sliced_addresses, list)
        for actual_address_tuple, expected_address, expected_tags_idx in zip(
                sliced_addresses, expected_addresses, expected_tags_idxs):
            actual_address, actual_tags_idx = actual_address_tuple[
                0], actual_address_tuple[1]
            self.assertEqual(expected_address, actual_address)
            self.assertListEqual(expected_tags_idx, actual_tags_idx)

        start_idx = 2
        end_idx = 4
        expected_addresses = [
            base_string.format(idx) for idx in range(start_idx, end_idx)
        ]
        expected_tags_idxs = [a_tags_sequence] * (end_idx - start_idx)

        sliced_addresses = csv_dataset_container[start_idx:end_idx]
        self.assertIsInstance(sliced_addresses, list)
        for actual_address_tuple, expected_address, expected_tags_idx in zip(
                sliced_addresses, expected_addresses, expected_tags_idxs):
            actual_address, actual_tags_idx = actual_address_tuple[
                0], actual_address_tuple[1]
            self.assertEqual(expected_address, actual_address)
            self.assertListEqual(expected_tags_idx, actual_tags_idx)