def test_extract_metadata_subsets(sample_data): """It should return some of the metadata only.""" sizes, times = sample_data assert extract_metadata(sizes, times, metadata=(Metadata.SIZE_METADATA | Metadata.TIME_METADATA)).shape == (3, 7) assert extract_metadata(sizes, times, metadata=(Metadata.SIZE_METADATA | Metadata.COUNT_METADATA)).shape == (3, 10)
def fixture_train_test_data(request, dataset) -> tuple: """Return a tuple of (x_train, x_test, y_train, y_test) in the closed-world setting. """ sizes, times, classes = dataset assert len(np.unique(classes)) == 3 if request.param == "sizes": main_features = ensure_non_ragged(sizes)[:, :5000] elif request.param == "timestamps": main_features = extract_interarrival_times(times)[:, :5000] else: raise ValueError(f"Unknown param {request.param}") assert main_features.shape[1] == 5000 metadata = (Metadata.COUNT_METADATA | Metadata.TIME_METADATA | Metadata.SIZE_METADATA) metadata_features = extract_metadata( sizes=sizes, timestamps=times, metadata=metadata) assert metadata_features.shape[1] == 12 features = np.hstack((main_features, metadata_features)) return train_test_split( features, classes, stratify=classes, random_state=7152217)
def test_extract_metadata_time_metadata(sample_data): """It should extract duration metadata from the traces.""" _, times = sample_data np.testing.assert_allclose( extract_metadata(timestamps=times, metadata=Metadata.TIME_METADATA), np.transpose([ # Duration [0.03, 0.025, 0.02], # Duration per packet [0.03 / 4, 0.025 / 3, 0.02 / 2] ]))
def test_extract_metadata_size_metadata(sample_data): """It should extract all size metadata from the traces.""" sizes, _ = sample_data np.testing.assert_allclose( extract_metadata(sizes=sizes, metadata=Metadata.SIZE_METADATA), np.transpose([ # Sizes [3370, 3850, 2550], # Outgoing and incoming sizes [2770, 3850, 2550], [600, 0, 0], # Outgoing and incoming size ratio [2770 / 3370, 1, 1], [600 / 3370, 0, 0], ]))
def test_extract_metadata_count_metadata(sample_data): """It should extract all count metadata from the traces.""" sizes, _ = sample_data np.testing.assert_allclose( extract_metadata(sizes=sizes, metadata=Metadata.COUNT_METADATA), np.transpose([ # Packet counts [4, 3, 2], # Outgoing and incoming counts [3, 3, 2], [1, 0, 0], # Outgoing and incoming count ratios [3 / 4, 3 / 3, 2 / 2], [1 / 4, 0, 0] ]))
def test_extract_metadata_require_sizes_or_timestamps(sample_data): """Test that it raises value error if incorrect features are provided.""" sizes, times = sample_data with pytest.raises(ValueError): extract_metadata(sizes, metadata=Metadata.TIME_METADATA) with pytest.raises(ValueError): extract_metadata(timestamps=times, metadata=Metadata.COUNT_METADATA) with pytest.raises(ValueError): extract_metadata(timestamps=times, metadata=Metadata.SIZE_METADATA)
def extract_features(self, sizes: np.ndarray, timestamps: np.ndarray) -> np.ndarray: metadata = (Metadata.COUNT_METADATA | Metadata.TIME_METADATA | Metadata.SIZE_METADATA) meta_features = extract_metadata(sizes=sizes, timestamps=timestamps, metadata=metadata, batch_size=5_000) if self.n_meta_features != meta_features.shape[1]: raise ValueError( f"The number of metadata features, {meta_features.shape[1]}, " f"does not match the amount specified: {self.n_meta_features}." ) if self.feature_type == "sizes": features = ensure_non_ragged(sizes, dimension=self.n_features_hint) else: assert self.feature_type == "time" features = extract_interarrival_times( timestamps, dimension=self.n_features_hint) return np.hstack((features, meta_features))
def test_extract_metadata_unspecified(sample_data): """It should return all of the metadata if unspecified.""" n_features = 12 assert extract_metadata(*sample_data).shape == (3, n_features)