def test_integration_user_define_tag_separator_fn(self): def internal_conversion_of_list_to_comma_separated_list(tags): return str(tags).replace("[", "").replace("]", "").replace("'", "") number_of_data_points = 4 create_csv_file( self.a_data_container_path, number_of_data_points=number_of_data_points, reformat_list_fn= internal_conversion_of_list_to_comma_separated_list, ) csv_dataset_container = CSVDatasetContainer( self.a_data_container_path, column_names=default_csv_column_name, tag_seperator_reformat_fn=comma_separated_list_reformat, ) self._test_integration(number_of_data_points, csv_dataset_container) number_of_data_points = 5 create_csv_file( self.a_data_container_path, number_of_data_points=number_of_data_points, reformat_list_fn= internal_conversion_of_list_to_comma_separated_list, ) csv_dataset_container = CSVDatasetContainer( self.a_data_container_path, column_names=default_csv_column_name, tag_seperator_reformat_fn=comma_separated_list_reformat, ) self._test_integration(number_of_data_points, csv_dataset_container)
def test_givenACSVDatasetContainer_whenGetOneItem_thenReturnTheCorrectItem( self): create_csv_file(self.a_data_container_path) csv_dataset_container = CSVDatasetContainer( self.a_data_container_path, column_names=default_csv_column_name) # first data point idx = 0 expected_address = base_string.format(idx) expected_tags_idx = a_tags_sequence actual_address, actual_tags_idx = csv_dataset_container[idx] self.assertEqual(expected_address, actual_address) self.assertListEqual(expected_tags_idx, actual_tags_idx) # second data point idx = 1 expected_address = base_string.format(idx) expected_tags_idx = a_tags_sequence actual_address, actual_tags_idx = csv_dataset_container[idx] self.assertEqual(expected_address, actual_address) self.assertListEqual(expected_tags_idx, actual_tags_idx) # third data point idx = 2 expected_address = base_string.format(idx) expected_tags_idx = a_tags_sequence actual_address, actual_tags_idx = csv_dataset_container[idx] self.assertEqual(expected_address, actual_address) self.assertListEqual(expected_tags_idx, actual_tags_idx)
def test_integration_user_define_column_names(self): user_define_column_names = ["a_name", "Another_name"] number_of_data_points = 4 create_csv_file( self.a_data_container_path, number_of_data_points=number_of_data_points, column_names=user_define_column_names, ) csv_dataset_container = CSVDatasetContainer( self.a_data_container_path, column_names=user_define_column_names) self._test_integration(number_of_data_points, csv_dataset_container) number_of_data_points = 5 create_csv_file( self.a_data_container_path, number_of_data_points=number_of_data_points, column_names=user_define_column_names, ) csv_dataset_container = CSVDatasetContainer( self.a_data_container_path, column_names=user_define_column_names) self._test_integration(number_of_data_points, csv_dataset_container)
def test_integration_user_define_separator(self): separator = ";" number_of_data_points = 4 create_csv_file(self.a_data_container_path, number_of_data_points=number_of_data_points, separator=separator) csv_dataset_container = CSVDatasetContainer( self.a_data_container_path, column_names=default_csv_column_name, separator=separator) self._test_integration(number_of_data_points, csv_dataset_container) number_of_data_points = 5 create_csv_file(self.a_data_container_path, number_of_data_points=number_of_data_points, separator=separator) csv_dataset_container = CSVDatasetContainer( self.a_data_container_path, column_names=default_csv_column_name, separator=separator) self._test_integration(number_of_data_points, csv_dataset_container)
def test_ifIsNotSupportedExportFile_raiseValueError(self): create_csv_file(self.fake_data_path_csv, predict_container=True) with self.assertRaises(ValueError): parse.main([ self.a_fasttext_model_type, self.fake_data_path_csv, self.a_unsupported_data_path, "--device", self.cpu_device, ])
def test_ifIsCSVFile_noColumnName_raiseValueError(self): create_csv_file(self.fake_data_path_csv, predict_container=True) with self.assertRaises(ValueError): parse.main([ self.a_fasttext_model_type, self.fake_data_path_csv, self.csv_export_filename, "--device", self.cpu_device, ])
def test_integration_csv(self): create_csv_file(self.fake_data_path_csv, predict_container=True) parse.main([ self.a_fasttext_att_model_type, self.fake_data_path_csv, self.csv_export_filename, "--device", self.cpu_device, "--csv_column_name", "Address", ]) export_path = generate_export_path(self.fake_data_path_csv, self.csv_export_filename) self.assertTrue(os.path.isfile(export_path))
def test_integration(self): number_of_data_points = 4 create_csv_file( self.a_data_container_path, number_of_data_points=number_of_data_points, ) csv_dataset_container = CSVDatasetContainer( self.a_data_container_path, column_names=default_csv_column_name) self._test_integration(number_of_data_points, csv_dataset_container) number_of_data_points = 5 create_csv_file( self.a_data_container_path, number_of_data_points=number_of_data_points, ) csv_dataset_container = CSVDatasetContainer( self.a_data_container_path, column_names=default_csv_column_name) self._test_integration(number_of_data_points, csv_dataset_container)
def test_integration_predict_container(self): number_of_data_points = 4 create_csv_file(self.a_data_container_path, number_of_data_points=number_of_data_points, predict_container=False) csv_dataset_container = CSVDatasetContainer( self.a_data_container_path, column_names=["Address"], is_training_container=False) self._test_integration(number_of_data_points, csv_dataset_container) number_of_data_points = 5 create_csv_file(self.a_data_container_path, number_of_data_points=number_of_data_points, predict_container=False) csv_dataset_container = CSVDatasetContainer( self.a_data_container_path, column_names=["Address"], is_training_container=False) self._test_integration(number_of_data_points, csv_dataset_container)
def test_givenAPickleDatasetContainer_whenGetSlice_thenReturnTheCorrectItems( self): create_csv_file(self.a_data_container_path) csv_dataset_container = CSVDatasetContainer( self.a_data_container_path, column_names=default_csv_column_name) start_idx = 0 end_idx = 2 expected_addresses = [ base_string.format(idx) for idx in range(start_idx, end_idx) ] expected_tags_idxs = [a_tags_sequence] * (end_idx - start_idx) sliced_addresses = csv_dataset_container[start_idx:end_idx] self.assertIsInstance(sliced_addresses, list) for actual_address_tuple, expected_address, expected_tags_idx in zip( sliced_addresses, expected_addresses, expected_tags_idxs): actual_address, actual_tags_idx = actual_address_tuple[ 0], actual_address_tuple[1] self.assertEqual(expected_address, actual_address) self.assertListEqual(expected_tags_idx, actual_tags_idx) start_idx = 2 end_idx = 4 expected_addresses = [ base_string.format(idx) for idx in range(start_idx, end_idx) ] expected_tags_idxs = [a_tags_sequence] * (end_idx - start_idx) sliced_addresses = csv_dataset_container[start_idx:end_idx] self.assertIsInstance(sliced_addresses, list) for actual_address_tuple, expected_address, expected_tags_idx in zip( sliced_addresses, expected_addresses, expected_tags_idxs): actual_address, actual_tags_idx = actual_address_tuple[ 0], actual_address_tuple[1] self.assertEqual(expected_address, actual_address) self.assertListEqual(expected_tags_idx, actual_tags_idx)