Esempio n. 1
0
    def test_drop_empty_samples_original_unchanged(self):
        data_list = [[1, 2, np.NAN], [np.NAN, np.NAN, np.NAN], [7, 8, 9]]
        label_list = ["a", "b", "c"]
        original = DataSet(pd.DataFrame(data_list), labels=label_list)

        filtered = original.drop_empty_samples()
        filtered.set_column(0, [-1, -1])
        filtered.labels[0] = "z"

        assert_that(original, equals_dataset(data_list))
        assert_that(original.get_labels(), contains(*label_list))
Esempio n. 2
0
    def test_slice_features_original_unchanged(self):
        df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
                          columns=["weight", "height", "age"])
        labels = ["m", "f", "m"]
        dataset = DataSet(df, labels=labels)
        sliced = dataset.slice_features(["weight", "height"])

        # Modify sliced data
        sliced.set_column("weight", [0, 0, 0])
        sliced.labels[0] = "x"

        # Check that it was indeed changed
        assert_that(sliced.get_column("weight"), contains(0, 0, 0))
        assert_that(sliced.get_labels(), contains("x", "f", "m"))

        # Verify it was not changed in the original dataset
        assert_that(dataset.get_column("weight"), contains(1, 4, 7))
        assert_that(dataset.get_labels(), contains(*labels))
Esempio n. 3
0
 def test_combine_labels(self):
     dataset = DataSet([[1, 2], [3, 4], [5, 6]], 
                       labels=pd.Series(["cat", "crow", "pidgeon"]))
     dataset.combine_labels(["crow", "pidgeon"], "bird")
     labels = dataset.get_labels()
     assert_that(labels, equals_series({0: "cat", 1: "bird", 2: "bird"}))