예제 #1
0
 def strudel(self, dist, group_val_index=None):
     attr = self.attribute
     ss = np.sum(dist)
     box = []
     if ss < 1e-6:
         cond = [FilterDiscrete(attr, None)]
         if group_val_index is not None:
             cond.append(FilterDiscrete(self.group_var, [group_val_index]))
         box.append(FilterGraphicsRectItem(cond, 0, -10, 1, 10))
     cum = 0
     for i, v in enumerate(dist):
         if v < 1e-6:
             continue
         if self.stretched:
             v /= ss
         v *= self.scale_x
         cond = [FilterDiscrete(attr, [i])]
         if group_val_index is not None:
             cond.append(FilterDiscrete(self.group_var, [group_val_index]))
         rect = FilterGraphicsRectItem(cond, cum + 1, -6, v - 2, 12)
         rect.setBrush(QBrush(QColor(*attr.colors[i])))
         rect.setPen(QPen(Qt.NoPen))
         if self.stretched:
             tooltip = "{}: {:.2f}%".format(attr.values[i],
                                            100 * dist[i] / sum(dist))
         else:
             tooltip = "{}: {}".format(attr.values[i], int(dist[i]))
         rect.setToolTip(tooltip)
         text = QGraphicsTextItem(attr.values[i])
         box.append(rect)
         box.append(text)
         cum += v
     return box
예제 #2
0
 def _gather_conditions(self):
     conditions = []
     attr = self.attribute
     group_attr = self.group_var
     for data_range in self.selection:
         if attr.is_discrete:
             # If some value was removed from the data (in case settings are
             # loaded from a scheme), do not include the corresponding
             # filter; this is appropriate since data with such value does
             # not exist anyway
             if not data_range.value:
                 condition = IsDefined([attr], negate=True)
             elif data_range.value not in attr.values:
                 continue
             else:
                 condition = FilterDiscrete(attr, [data_range.value])
         else:
             condition = FilterContinuous(attr, FilterContinuous.Between,
                                          data_range.low, data_range.high)
         if data_range.group_value:
             if not data_range.group_value:
                 grp_filter = IsDefined([group_attr], negate=True)
             elif data_range.group_value not in group_attr.values:
                 continue
             else:
                 grp_filter = FilterDiscrete(group_attr,
                                             [data_range.group_value])
             condition = Values([condition, grp_filter], conjunction=True)
         conditions.append(condition)
     return conditions
 def test_eq(self):
     flt1 = FilterDiscrete(1, None)
     flt2 = FilterDiscrete(1, None)
     flt3 = FilterDiscrete(2, None)
     self.assertEqual(flt1, flt2)
     self.assertEqual(flt1.__dict__ == flt2.__dict__, flt1 == flt2)
     self.assertNotEqual(flt1, flt3)
     self.assertEqual(flt1.__dict__ == flt3.__dict__, flt1 == flt3)
 def commit(self):
     if len(self.selection):
         cluster_ids = set()
         column_ids = set()
         for (ir, ic) in self.selection:
             cluster_ids.add(ir)
             column_ids.add(ic)
         new_domain = Domain([
             self.data.domain[self.columns.values[col]]
             for col in column_ids
         ], self.data.domain.class_vars, self.data.domain.metas)
         selected_data = Values([
             FilterDiscrete(self.cluster_var, [self.clusters[ir]])
             for ir in cluster_ids
         ],
                                conjunction=False)(self.data)
         selected_data = selected_data.transform(new_domain)
         annotated_data = create_annotated_table(
             self.data.transform(new_domain),
             np.where(np.in1d(self.data.ids, selected_data.ids, True)))
     else:
         selected_data = None
         annotated_data = create_annotated_table(self.data, [])
     if self.ca is not None and self._task is None:
         table = self.ca.create_contingency_table()
     else:
         table = None
     self.Outputs.selected_data.send(selected_data)
     self.Outputs.annotated_data.send(annotated_data)
     self.Outputs.contingency.send(table)
예제 #5
0
 def __init__(self, dist, attr, group_val_index=None, group_var=None):
     self.dist = dist
     self.n = n = np.sum(dist[1])
     if n == 0:
         return
     self.a_min = float(dist[0, 0])
     self.a_max = float(dist[0, -1])
     self.mean = float(np.sum(dist[0] * dist[1]) / n)
     self.var = float(np.sum(dist[1] * (dist[0] - self.mean)**2) / n)
     self.dev = math.sqrt(self.var)
     s = 0
     thresholds = [n / 4, n / 2, n / 4 * 3]
     thresh_i = 0
     q = []
     for i, e in enumerate(dist[1]):
         s += e
         if s >= thresholds[thresh_i]:
             if s == thresholds[thresh_i] and i + 1 < dist.shape[1]:
                 q.append(float((dist[0, i] + dist[0, i + 1]) / 2))
             else:
                 q.append(float(dist[0, i]))
             thresh_i += 1
             if thresh_i == 3:
                 self.q25, self.median, self.q75 = q
                 break
     else:
         self.q25 = self.q75 = None
         self.median = q[1] if len(q) == 2 else None
     self.conditions = [
         FilterContinuous(attr, FilterContinuous.Between, self.q25,
                          self.q75)
     ]
     if group_val_index is not None:
         self.conditions.append(FilterDiscrete(group_var,
                                               [group_val_index]))
예제 #6
0
            def selectionChanged(self, *args):
                super().selectionChanged(*args)

                rows = list({
                    index.row()
                    for index in self.selectionModel().selectedRows(0)
                })

                if not rows:
                    owwidget.Outputs.data.send(None)
                    return

                model = self.model().tolist()
                filters = [
                    Values([
                        FilterDiscrete(self._vars[col], {model[row][col]})
                        for col in range(len(self._vars))
                    ]) for row in self.model().mapToSourceRows(rows)
                ]
                data = Values(filters, conjunction=False)(owwidget.data)

                annotated = create_annotated_table(owwidget.data, data.ids)

                owwidget.Outputs.selected_data.send(data)
                owwidget.Outputs.data.send(annotated)
예제 #7
0
 def commit(self):
     if len(self.selection):
         cells = []
         for ir, r in enumerate(self.rows.values):
             for ic, c in enumerate(self.columns.values):
                 if (ir, ic) in self.selection:
                     cells.append(
                         Values([
                             FilterDiscrete(self.rows, [r]),
                             FilterDiscrete(self.columns, [c])
                         ]))
         selected_data = Values(cells, conjunction=False)(self.data)
         annotated_data = create_annotated_table(
             self.data,
             np.where(np.in1d(self.data.ids, selected_data.ids, True)))
     else:
         selected_data = None
         annotated_data = create_annotated_table(self.data, [])
     self.Outputs.contingency.send(self.table)
     self.Outputs.selected_data.send(selected_data)
     self.Outputs.annotated_data.send(annotated_data)
예제 #8
0
    def get_filtered_data(self):
        if not self.data or not self.selection or not self.pivot.pivot_table:
            return None

        cond = []
        for i, j in self.selection:
            f = []
            for at, val in [(self.row_feature, self.pivot.pivot_table.X[i, 0]),
                            (self.col_feature, j)]:
                if isinstance(at, DiscreteVariable):
                    f.append(FilterDiscrete(at, [val]))
                elif isinstance(at, ContinuousVariable):
                    f.append(FilterContinuous(at, FilterContinuous.Equal, val))
            cond.append(Values(f))
        return Values(cond, conjunction=False)(self.data)
 def test_values(self):
     vs = self.iris.domain.variables
     f1 = FilterContinuous(vs[0], FilterContinuous.Less, 5)
     f2 = FilterContinuous(vs[1], FilterContinuous.Greater, 3)
     f3 = FilterDiscrete(vs[4], [2])
     f12 = Values([f1, f2], conjunction=False, negate=True)
     f123 = Values([f12, f3])
     d12 = f12(self.iris)
     d123 = f123(self.iris)
     self.assertGreater(len(d12), len(d123))
     self.assertTrue((d123.X[:, 0] >= 5).all())
     self.assertTrue((d123.X[:, 1] <= 3).all())
     self.assertTrue((d123.Y == 2).all())
     self.assertEqual(len(d123),
                      (~((self.iris.X[:, 0] < 5) | (self.iris.X[:, 1] > 3))
                       & (self.iris.Y == 2)).sum())
예제 #10
0
    def commit(self):
        if self.data is None:
            self.Outputs.selected_data.send(None)
            self.Outputs.annotated_data.send(None)
            self.Outputs.contingency.send(None)
            return

        if len(self.selection_indices):
            cluster_ids = set()
            gene_ids = set()
            for (ir, ic) in self.selection_indices:
                if not self.transpose:
                    cluster_ids.add(ir)
                    gene_ids.add(ic)
                else:
                    cluster_ids.add(ic)
                    gene_ids.add(ir)

            columns = self.clusters if self.transpose else [str(x) for x in self.matrix.domain.attributes]
            rows = self.clusters if not self.transpose else [str(x) for x in self.matrix.domain.attributes]
            new_domain = Domain([self.data.domain[columns[i]] for i in gene_ids],
                                self.data.domain.class_vars,
                                self.data.domain.metas)
            selected_data = Values([FilterDiscrete(self.cluster_var, [rows[i]])
                                    for i in cluster_ids],
                                   conjunction=False)(self.data)
            selected_data = selected_data.transform(new_domain)
            annotated_data = create_annotated_table(self.data,
                                                    np.where(np.in1d(self.data.ids, selected_data.ids, True)))
        else:
            selected_data = None
            annotated_data = create_annotated_table(self.data, [])

        clusters_values = list(set(self.clusters))
        table = ClusterAnalysis.contingency_table(
            self.matrix,
            DiscreteVariable("Gene" if self.transpose else self.cluster_var.name, clusters_values),
            [str(x) for x in self.matrix.domain.attributes],
            [[clusters_values.index(c)] for c in self.clusters]
        )

        self.Outputs.selected_data.send(selected_data)
        self.Outputs.annotated_data.send(annotated_data)
        self.Outputs.contingency.send(table)
예제 #11
0
    def test_reprs(self):
        flid = IsDefined(negate=True)
        flhc = HasClass()
        flr = Random()
        fld = FilterDiscrete(self.attr_disc, None)
        flsv = SameValue(self.attr_disc, self.value_disc, negate=True)
        flc = FilterContinuous(self.vs[0], FilterContinuous.Less, 5)
        flc2 = FilterContinuous(self.vs[1], FilterContinuous.Greater, 3)
        flv = Values([flc, flc2], conjunction=False, negate=True)
        flvf = ValueFilter(self.attr_disc)
        fls = FilterString("name", FilterString.Equal, "Aardvark", case_sensitive=False)
        flsl = FilterStringList("name", ["Aardvark"], case_sensitive=False)
        flrx = FilterRegex("name", "^c...$")

        filters = [flid, flhc, flr, fld, flsv, flc, flv, flvf, fls, flsl, flrx]

        for f in filters:
            repr_str = repr(f)
            new_f = eval(repr_str)
            self.assertEqual(repr(new_f), repr_str)
예제 #12
0
 def __init__(self, dist, attr, group_val_index=None, group_var=None):
     self.dist = dist
     self.n = n = np.sum(dist[1])
     if n == 0:
         return
     self.a_min = float(dist[0, 0])
     self.a_max = float(dist[0, -1])
     self.mean = float(np.sum(dist[0] * dist[1]) / n)
     self.var = float(np.sum(dist[1] * (dist[0] - self.mean) ** 2) / n)
     self.dev = math.sqrt(self.var)
     a, freq = np.asarray(dist)
     q25, median, q75 = _quantiles(a, freq, [0.25, 0.5, 0.75])
     self.median = median
     # The code below omits the q25 or q75 in the plot when they are None
     self.q25 = None if q25 == median else q25
     self.q75 = None if q75 == median else q75
     self.conditions = [FilterContinuous(attr, FilterContinuous.Between,
                                         self.q25, self.q75)]
     if group_val_index is not None:
         self.conditions.append(FilterDiscrete(group_var, [group_val_index]))
예제 #13
0
 def commit(self):
     averages = None
     if self.data is not None:
         if self.group_var is None:
             averages = self.average_table(self.data)
         else:
             parts = []
             for value in self.group_var.values:
                 svfilter = SameValue(self.group_var, value)
                 v_table = self.average_table(svfilter(self.data))
                 parts.append(v_table)
             # Using "None" as in OWSelectRows
             # Values is required because FilterDiscrete doesn't have
             # negate keyword or IsDefined method
             deffilter = Values(
                 conditions=[FilterDiscrete(self.group_var, None)],
                 negate=True)
             v_table = self.average_table(deffilter(self.data))
             parts.append(v_table)
             averages = Orange.data.Table.concatenate(parts, axis=0)
     self.Outputs.averages.send(averages)
예제 #14
0
    def commit(self):
        if len(self.selection):
            cluster_ids = set()
            gene_ids = set()
            for (ir, ic) in self.selection:
                if not self.transpose:
                    cluster_ids.add(ir)
                    gene_ids.add(ic)
                else:
                    cluster_ids.add(ic)
                    gene_ids.add(ir)

            new_domain = Domain(
                [self.data.domain[self.genes[i]] for i in gene_ids],
                self.data.domain.class_vars, self.data.domain.metas)
            selected_data = Values([
                FilterDiscrete(self.cluster_var, [self.clusters[i]])
                for i in cluster_ids
            ],
                                   conjunction=False)(self.data)
            selected_data = selected_data.transform(new_domain)
            annotated_data = create_annotated_table(
                self.data.transform(new_domain),
                np.where(np.in1d(self.data.ids, selected_data.ids, True)))
        else:
            selected_data = None
            annotated_data = create_annotated_table(self.data, [])
        if self.matrix is not None:
            table = ClusterAnalysis.contingency_table(
                self.matrix,
                DiscreteVariable(self.cluster_var.name,
                                 np.array(self.clusters)),
                np.array(self.genes)[self.gene_order],
                self.cluster_order[..., np.newaxis])
        else:
            table = None
        self.Outputs.selected_data.send(selected_data)
        self.Outputs.annotated_data.send(annotated_data)
        self.Outputs.contingency.send(table)
예제 #15
0
 def bench_adult_filter_pre_pandas(self):
     age_filter = FilterContinuous(self.adult.domain["age"],
                                   FilterContinuous.Greater, 30)
     workclass_filter = FilterDiscrete(self.adult.domain["workclass"], [0])
     combined = Values([age_filter, workclass_filter])
     combined(self.adult)