def value_filter(self, feature, values): """ Filters the data set based on its values for a given feature. Args: feature: string The name of the feature whose value will be examined for each sample. values: single value or list of values. Samples passing through the filter must have one of these values for the specified feature. Returns: filtered: model.DataSet The filtered data set. """ samples = pandas_util.find(self.get_column(feature), values) return self.sample_filter(samples)
def label_filter(self, labels): """ Filters the data set based on its labels. Args: labels: single value or list of values Samples with one of these labels will remain in the filtered data set. All others will be removed. Returns: filtered: model.DataSet The filtered data set. Raises: UnlabelledDataSetError if the data set is not labeled. """ if not self.is_labelled(): raise UnlabelledDataSetError() return self.sample_filter(pandas_util.find(self.labels, labels))
def test_find_multiple_values(self): series = pd.Series(["hostile", "friendly", "friendly", "not_friendly"], index=["wolf", "cat", "dog", "mouse"]) indices = pandas_util.find(series, ["friendly", "not_friendly"]) assert_that(indices, contains("cat", "dog", "mouse"))
def test_find_one_value(self): series = pd.Series(["friendly", "friendly", "not_friendly"], index=["cat", "dog", "mouse"]) indices = pandas_util.find(series, "friendly") assert_that(indices, contains("cat", "dog"))