def test_generate_statistics_some_codes(self): needed_feature_codes = random.sample(self.feature_codes, k=2) file_processor = FileProcessor(self.input_file_path) actual_stats = file_processor.generate_statistics(needed_feature_codes) _, feature_codes, features = read_test_data_as_arrays( self.input_file_path) selected_indexes = [] for code in needed_feature_codes: indexes = np.where(feature_codes == code)[0].tolist() selected_indexes += indexes selected_features = features[selected_indexes] expected_stats = { "count": np.uint32(selected_features.shape[0]), "mean": selected_features.mean(axis=0), "std": selected_features.std(axis=0, ddof=1), "max": np.amax(selected_features, axis=0), "min": np.amin(selected_features, axis=0) } for metric in expected_stats: np.testing.assert_allclose(actual_stats[metric], expected_stats[metric])
def test_generate_statistics_all_codes(self): file_processor = FileProcessor(self.input_file_path) actual_stats = file_processor.generate_statistics() _, _, features = read_test_data_as_arrays(self.input_file_path) expected_stats = { "count": np.uint32(features.shape[0]), "mean": features.mean(axis=0), "std": features.std(axis=0, ddof=1), "max": np.amax(features, axis=0), "min": np.amin(features, axis=0) } for metric in expected_stats: np.testing.assert_allclose(actual_stats[metric], expected_stats[metric])