def fixture_train_test_data(request, dataset) -> tuple: """Return a tuple of (x_train, x_test, y_train, y_test) in the closed-world setting. """ sizes, times, classes = dataset assert len(np.unique(classes)) == 3 if request.param == "sizes": main_features = ensure_non_ragged(sizes)[:, :5000] elif request.param == "timestamps": main_features = extract_interarrival_times(times)[:, :5000] else: raise ValueError(f"Unknown param {request.param}") assert main_features.shape[1] == 5000 metadata = (Metadata.COUNT_METADATA | Metadata.TIME_METADATA | Metadata.SIZE_METADATA) metadata_features = extract_metadata( sizes=sizes, timestamps=times, metadata=metadata) assert metadata_features.shape[1] == 12 features = np.hstack((main_features, metadata_features)) return train_test_split( features, classes, stratify=classes, random_state=7152217)
def extract_features( # pylint: disable=unused-argument self, sizes: np.ndarray, timestamps: np.ndarray) -> np.ndarray: """Extract size features for training and testing the classifier. Override in a subclass to change the features used. """ return ensure_non_ragged(sizes, dimension=self.n_features_hint)
def test_ensure_non_ragged_crop_no_copy(ragged_data): """It should reduce the data to the specified dimension if no copy is necessary. """ _, expected = ragged_data result = ensure_non_ragged(expected, dimension=3) np.testing.assert_array_equal(result, expected[:, :3])
def test_ensure_non_ragged(ragged_data): """Ensures that a ragged array is made not ragged. """ data, expected = ragged_data result = ensure_non_ragged(data, copy=False) np.testing.assert_array_equal(result, expected) assert not np.shares_memory(result, expected)
def fixture_train_test_sizes(dataset) -> tuple: """Return a tuple of (x_train, x_test, y_train, y_test) in the open-world setting. """ sizes, _, classes = dataset features = ensure_non_ragged(sizes)[:, :5000] assert len(np.unique(classes)) == 3 assert features.shape[1] == 5000 return train_test_split( features, classes, stratify=classes, random_state=7141845)
def extract_features(self, sizes: np.ndarray, timestamps: np.ndarray) -> np.ndarray: metadata = (Metadata.COUNT_METADATA | Metadata.TIME_METADATA | Metadata.SIZE_METADATA) meta_features = extract_metadata(sizes=sizes, timestamps=timestamps, metadata=metadata, batch_size=5_000) if self.n_meta_features != meta_features.shape[1]: raise ValueError( f"The number of metadata features, {meta_features.shape[1]}, " f"does not match the amount specified: {self.n_meta_features}." ) if self.feature_type == "sizes": features = ensure_non_ragged(sizes, dimension=self.n_features_hint) else: assert self.feature_type == "time" features = extract_interarrival_times( timestamps, dimension=self.n_features_hint) return np.hstack((features, meta_features))
def test_ensure_non_ragged_pad(ragged_data): """It should reduce the data to the specified dimension.""" data, expected = ragged_data result = ensure_non_ragged(data, dimension=6) np.testing.assert_array_equal(result, np.pad(expected, [(0, 0), (0, 2)]))
def test_ensure_non_ragged_crop(ragged_data): """It should reduce the data to the specified dimension.""" data, expected = ragged_data result = ensure_non_ragged(data, dimension=3) np.testing.assert_array_equal(result, expected[:, :3])
def test_ensure_non_ragged_copy(ragged_data): """Should copy when copy=True""" _, expected = ragged_data result = ensure_non_ragged(expected, copy=True) np.testing.assert_array_equal(result, expected) assert not np.shares_memory(result, expected)
def test_ensure_non_ragged_noop(ragged_data): """If already not ragged, it should not be changed""" _, expected = ragged_data result = ensure_non_ragged(expected, copy=False) np.testing.assert_array_equal(result, expected) assert np.shares_memory(result, expected)