def test_eq(self):
     flt1 = FilterContinuous(1, FilterContinuous.Between, 1, 2)
     flt2 = FilterContinuous(1, FilterContinuous.Between, 1, 2)
     flt3 = FilterContinuous(1, FilterContinuous.Between, 1, 3)
     self.assertEqual(flt1, flt2)
     self.assertNotEqual(flt1, flt3)
     self.assertEqual(flt1.__dict__ == flt2.__dict__, flt1 == flt2)
     self.assertEqual(flt1.__dict__ == flt3.__dict__, flt1 == flt3)
예제 #2
0
 def _gather_conditions(self):
     conditions = []
     attr = self.attribute
     group_attr = self.group_var
     for data_range in self.selection:
         if attr.is_discrete:
             # If some value was removed from the data (in case settings are
             # loaded from a scheme), do not include the corresponding
             # filter; this is appropriate since data with such value does
             # not exist anyway
             if not data_range.value:
                 condition = IsDefined([attr], negate=True)
             elif data_range.value not in attr.values:
                 continue
             else:
                 condition = FilterDiscrete(attr, [data_range.value])
         else:
             condition = FilterContinuous(attr, FilterContinuous.Between,
                                          data_range.low, data_range.high)
         if data_range.group_value:
             if not data_range.group_value:
                 grp_filter = IsDefined([group_attr], negate=True)
             elif data_range.group_value not in group_attr.values:
                 continue
             else:
                 grp_filter = FilterDiscrete(group_attr,
                                             [data_range.group_value])
             condition = Values([condition, grp_filter], conjunction=True)
         conditions.append(condition)
     return conditions
 def test_values(self):
     vs = self.iris.domain.variables
     f1 = FilterContinuous(vs[0], FilterContinuous.Less, 5)
     f2 = FilterContinuous(vs[1], FilterContinuous.Greater, 3)
     f3 = FilterDiscrete(vs[4], [2])
     f12 = Values([f1, f2], conjunction=False, negate=True)
     f123 = Values([f12, f3])
     d12 = f12(self.iris)
     d123 = f123(self.iris)
     self.assertGreater(len(d12), len(d123))
     self.assertTrue((d123.X[:, 0] >= 5).all())
     self.assertTrue((d123.X[:, 1] <= 3).all())
     self.assertTrue((d123.Y == 2).all())
     self.assertEqual(len(d123),
                      (~((self.iris.X[:, 0] < 5) | (self.iris.X[:, 1] > 3))
                       & (self.iris.Y == 2)).sum())
예제 #4
0
 def __init__(self, dist, attr, group_val_index=None, group_var=None):
     self.dist = dist
     self.n = n = np.sum(dist[1])
     if n == 0:
         return
     self.a_min = float(dist[0, 0])
     self.a_max = float(dist[0, -1])
     self.mean = float(np.sum(dist[0] * dist[1]) / n)
     self.var = float(np.sum(dist[1] * (dist[0] - self.mean)**2) / n)
     self.dev = math.sqrt(self.var)
     s = 0
     thresholds = [n / 4, n / 2, n / 4 * 3]
     thresh_i = 0
     q = []
     for i, e in enumerate(dist[1]):
         s += e
         if s >= thresholds[thresh_i]:
             if s == thresholds[thresh_i] and i + 1 < dist.shape[1]:
                 q.append(float((dist[0, i] + dist[0, i + 1]) / 2))
             else:
                 q.append(float(dist[0, i]))
             thresh_i += 1
             if thresh_i == 3:
                 self.q25, self.median, self.q75 = q
                 break
     else:
         self.q25 = self.q75 = None
         self.median = q[1] if len(q) == 2 else None
     self.conditions = [
         FilterContinuous(attr, FilterContinuous.Between, self.q25,
                          self.q75)
     ]
     if group_val_index is not None:
         self.conditions.append(FilterDiscrete(group_var,
                                               [group_val_index]))
예제 #5
0
    def test_reprs(self):
        flid = IsDefined(negate=True)
        flhc = HasClass()
        flr = Random()
        fld = FilterDiscrete(self.attr_disc, None)
        flsv = SameValue(self.attr_disc, self.value_disc, negate=True)
        flc = FilterContinuous(self.vs[0], FilterContinuous.Less, 5)
        flc2 = FilterContinuous(self.vs[1], FilterContinuous.Greater, 3)
        flv = Values([flc, flc2], conjunction=False, negate=True)
        flvf = ValueFilter(self.attr_disc)
        fls = FilterString("name", FilterString.Equal, "Aardvark", case_sensitive=False)
        flsl = FilterStringList("name", ["Aardvark"], case_sensitive=False)
        flrx = FilterRegex("name", "^c...$")

        filters = [flid, flhc, flr, fld, flsv, flc, flv, flvf, fls, flsl, flrx]

        for f in filters:
            repr_str = repr(f)
            new_f = eval(repr_str)
            self.assertEqual(repr(new_f), repr_str)
예제 #6
0
    def test_min(self):
        flt = FilterContinuous(1, FilterContinuous.Between, 1, 2)
        self.assertEqual(flt.min, 1)
        self.assertEqual(flt.max, 2)
        self.assertEqual(flt.ref, 1)

        flt.ref = 0
        self.assertEqual(flt.min, 0)

        flt.min = -1
        self.assertEqual(flt.ref, -1)

        self.assertRaises(
            TypeError,
            FilterContinuous, 1, FilterContinuous.Equal, 0, c=12)
        self.assertRaises(
            TypeError,
            FilterContinuous, 1, FilterContinuous.Equal, 0, min=5, c=12)

        flt = FilterContinuous(1, FilterContinuous.Between, min=1, max=2)
        self.assertEqual(flt.ref, 1)
    def test_min(self):
        flt = FilterContinuous(1, FilterContinuous.Between, 1, 2)
        self.assertEqual(flt.min, 1)
        self.assertEqual(flt.max, 2)
        self.assertEqual(flt.ref, 1)

        flt.ref = 0
        self.assertEqual(flt.min, 0)

        flt.min = -1
        self.assertEqual(flt.ref, -1)

        self.assertRaises(TypeError,
                          FilterContinuous,
                          1,
                          FilterContinuous.Equal,
                          0,
                          c=12)
        self.assertRaises(TypeError,
                          FilterContinuous,
                          1,
                          FilterContinuous.Equal,
                          0,
                          min=5,
                          c=12)

        flt = FilterContinuous(1, FilterContinuous.Between, min=1, max=2)
        self.assertEqual(flt.ref, 1)
예제 #8
0
    def get_filtered_data(self):
        if not self.data or not self.selection or not self.pivot.pivot_table:
            return None

        cond = []
        for i, j in self.selection:
            f = []
            for at, val in [(self.row_feature, self.pivot.pivot_table.X[i, 0]),
                            (self.col_feature, j)]:
                if isinstance(at, DiscreteVariable):
                    f.append(FilterDiscrete(at, [val]))
                elif isinstance(at, ContinuousVariable):
                    f.append(FilterContinuous(at, FilterContinuous.Equal, val))
            cond.append(Values(f))
        return Values(cond, conjunction=False)(self.data)
예제 #9
0
 def __init__(self, dist, attr, group_val_index=None, group_var=None):
     self.dist = dist
     self.n = n = np.sum(dist[1])
     if n == 0:
         return
     self.a_min = float(dist[0, 0])
     self.a_max = float(dist[0, -1])
     self.mean = float(np.sum(dist[0] * dist[1]) / n)
     self.var = float(np.sum(dist[1] * (dist[0] - self.mean) ** 2) / n)
     self.dev = math.sqrt(self.var)
     a, freq = np.asarray(dist)
     q25, median, q75 = _quantiles(a, freq, [0.25, 0.5, 0.75])
     self.median = median
     # The code below omits the q25 or q75 in the plot when they are None
     self.q25 = None if q25 == median else q25
     self.q75 = None if q75 == median else q75
     self.conditions = [FilterContinuous(attr, FilterContinuous.Between,
                                         self.q25, self.q75)]
     if group_val_index is not None:
         self.conditions.append(FilterDiscrete(group_var, [group_val_index]))
예제 #10
0
 def bench_adult_filter_pre_pandas(self):
     age_filter = FilterContinuous(self.adult.domain["age"],
                                   FilterContinuous.Greater, 30)
     workclass_filter = FilterDiscrete(self.adult.domain["workclass"], [0])
     combined = Values([age_filter, workclass_filter])
     combined(self.adult)
    def test_str(self):
        flt = FilterContinuous(1, FilterContinuous.Equal, 1)

        self.assertEqual(str(flt), "feature(1) = 1")

        flt = FilterContinuous("foo", FilterContinuous.Equal, 1)
        self.assertEqual(str(flt), "foo = 1")

        flt = FilterContinuous(self.domain[0], FilterContinuous.Equal, 1, 2)
        self.assertEqual(str(flt), "a = 1")

        flt.oper = flt.NotEqual
        self.assertEqual(str(flt), "a ≠ 1")

        flt.oper = flt.Less
        self.assertEqual(str(flt), "a < 1")

        flt.oper = flt.LessEqual
        self.assertEqual(str(flt), "a ≤ 1")

        flt.oper = flt.Greater
        self.assertEqual(str(flt), "a > 1")

        flt.oper = flt.GreaterEqual
        self.assertEqual(str(flt), "a ≥ 1")

        flt.oper = flt.Between
        self.assertEqual(str(flt), "1 ≤ a ≤ 2")

        flt.oper = flt.Outside
        self.assertEqual(str(flt), "not 1 ≤ a ≤ 2")

        flt.oper = flt.IsDefined
        self.assertEqual(str(flt), "a is defined")

        flt.oper = -1
        self.assertEqual(str(flt), "invalid operator")
예제 #12
0
    def test_str(self):
        flt = FilterContinuous(1, FilterContinuous.Equal, 1)

        self.assertEqual(str(flt), "feature(1) = 1")

        flt = FilterContinuous("foo", FilterContinuous.Equal, 1)
        self.assertEqual(str(flt), "foo = 1")

        flt = FilterContinuous(self.domain[0], FilterContinuous.Equal, 1, 2)
        self.assertEqual(str(flt), "a = 1")

        flt.oper = flt.NotEqual
        self.assertEqual(str(flt), "a ≠ 1")

        flt.oper = flt.Less
        self.assertEqual(str(flt), "a < 1")

        flt.oper = flt.LessEqual
        self.assertEqual(str(flt), "a ≤ 1")

        flt.oper = flt.Greater
        self.assertEqual(str(flt), "a > 1")

        flt.oper = flt.GreaterEqual
        self.assertEqual(str(flt), "a ≥ 1")

        flt.oper = flt.Between
        self.assertEqual(str(flt), "1 ≤ a ≤ 2")

        flt.oper = flt.Outside
        self.assertEqual(str(flt), "not 1 ≤ a ≤ 2")

        flt.oper = flt.IsDefined
        self.assertEqual(str(flt), "a is defined")

        flt.oper = -1
        self.assertEqual(str(flt), "invalid operator")