def test_add_data_set(self): # Create empty data set. data_set = DataSet([], []) # Test invalid data on empty data set. self._test_add_data_set_invalid(data_set) # Add data sets. data_set.add_data_set(DataSet(source.MATRIX_LIST_OF_NUMPYS, source.MATRIX_METADATA)) data_set.add_data_set( DataSet(source.MATRIX_WITH_DUPLICATES_LIST_OF_NUMPYS, source.MATRIX_WITH_DUPLICATES_METADATA_UNEQUAL)) self.assertIsInstance(data_set.data_points, list) self.assertIsInstance(data_set.metadata, list) result_matrix = source.MATRIX_LIST_OF_NUMPYS + source.MATRIX_WITH_DUPLICATES_LIST_OF_NUMPYS result_metadata = source.MATRIX_METADATA + source.MATRIX_WITH_DUPLICATES_METADATA_UNEQUAL numpy.testing.assert_allclose(data_set.data_points, result_matrix) self.assertListEqual(data_set.metadata, result_metadata) # Test invalid data on non-empty data set. self._test_add_data_set_invalid(data_set) # Add data set of wrong shape. new_data_set = DataSet([source.VECTOR_3D_NUMPY], [source.VECTOR_3D_METADATA]) self.assertRaises(ValueError, data_set.add_data_set, new_data_set)
def _test_add_data_set_invalid(self, data_set: DataSet): # Store old data set. old_data_points = numpy.copy(data_set.data_points) old_metadata = copy.deepcopy(data_set.metadata) # Test empty matrices. data_set.add_data_set(DataSet([], [])) self.assertIsInstance(data_set.data_points, list) self.assertIsInstance(data_set.metadata, list) numpy.testing.assert_allclose(data_set.data_points, old_data_points) self.assertListEqual(data_set.metadata, old_metadata) # Test other data types. self.assertRaises(ValueError, data_set.add_data_set, source.CUSTOM_DICT) self.assertRaises(ValueError, data_set.add_data_set, source.CUSTOM_STR) self.assertRaises(ValueError, data_set.add_data_set, None)
def get_all_data_sets(root_dir: str) -> DataSet: """ Retrieve all data points in specified directory (or its subdirectories). :param root_dir: str - Path to directory. :return: DataSet - Retrieved data points and corresponding metadata structures. """ # Get all data set file paths in # specified directory (or its subdirectories). data_set_file_paths = get_all_data_set_file_paths(root_dir) # Retrieve corresponding data sets and # concatenate them to a single DataSet instance. new_data_set = DataSet([], []) for file_path in data_set_file_paths: current_data_set = read_data_set(file_path) new_data_set.add_data_set(current_data_set) return new_data_set