def test_drop_empty_samples_original_unchanged(self): data_list = [[1, 2, np.NAN], [np.NAN, np.NAN, np.NAN], [7, 8, 9]] label_list = ["a", "b", "c"] original = DataSet(pd.DataFrame(data_list), labels=label_list) filtered = original.drop_empty_samples() filtered.set_column(0, [-1, -1]) filtered.labels[0] = "z" assert_that(original, equals_dataset(data_list)) assert_that(original.get_labels(), contains(*label_list))
def test_slice_features_original_unchanged(self): df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["weight", "height", "age"]) labels = ["m", "f", "m"] dataset = DataSet(df, labels=labels) sliced = dataset.slice_features(["weight", "height"]) # Modify sliced data sliced.set_column("weight", [0, 0, 0]) sliced.labels[0] = "x" # Check that it was indeed changed assert_that(sliced.get_column("weight"), contains(0, 0, 0)) assert_that(sliced.get_labels(), contains("x", "f", "m")) # Verify it was not changed in the original dataset assert_that(dataset.get_column("weight"), contains(1, 4, 7)) assert_that(dataset.get_labels(), contains(*labels))
def test_combine_labels(self): dataset = DataSet([[1, 2], [3, 4], [5, 6]], labels=pd.Series(["cat", "crow", "pidgeon"])) dataset.combine_labels(["crow", "pidgeon"], "bird") labels = dataset.get_labels() assert_that(labels, equals_series({0: "cat", 1: "bird", 2: "bird"}))