def strudel(self, dist, group_val_index=None): attr = self.attribute ss = np.sum(dist) box = [] if ss < 1e-6: cond = [FilterDiscrete(attr, None)] if group_val_index is not None: cond.append(FilterDiscrete(self.group_var, [group_val_index])) box.append(FilterGraphicsRectItem(cond, 0, -10, 1, 10)) cum = 0 for i, v in enumerate(dist): if v < 1e-6: continue if self.stretched: v /= ss v *= self.scale_x cond = [FilterDiscrete(attr, [i])] if group_val_index is not None: cond.append(FilterDiscrete(self.group_var, [group_val_index])) rect = FilterGraphicsRectItem(cond, cum + 1, -6, v - 2, 12) rect.setBrush(QBrush(QColor(*attr.colors[i]))) rect.setPen(QPen(Qt.NoPen)) if self.stretched: tooltip = "{}: {:.2f}%".format(attr.values[i], 100 * dist[i] / sum(dist)) else: tooltip = "{}: {}".format(attr.values[i], int(dist[i])) rect.setToolTip(tooltip) text = QGraphicsTextItem(attr.values[i]) box.append(rect) box.append(text) cum += v return box
def _gather_conditions(self): conditions = [] attr = self.attribute group_attr = self.group_var for data_range in self.selection: if attr.is_discrete: # If some value was removed from the data (in case settings are # loaded from a scheme), do not include the corresponding # filter; this is appropriate since data with such value does # not exist anyway if not data_range.value: condition = IsDefined([attr], negate=True) elif data_range.value not in attr.values: continue else: condition = FilterDiscrete(attr, [data_range.value]) else: condition = FilterContinuous(attr, FilterContinuous.Between, data_range.low, data_range.high) if data_range.group_value: if not data_range.group_value: grp_filter = IsDefined([group_attr], negate=True) elif data_range.group_value not in group_attr.values: continue else: grp_filter = FilterDiscrete(group_attr, [data_range.group_value]) condition = Values([condition, grp_filter], conjunction=True) conditions.append(condition) return conditions
def test_eq(self): flt1 = FilterDiscrete(1, None) flt2 = FilterDiscrete(1, None) flt3 = FilterDiscrete(2, None) self.assertEqual(flt1, flt2) self.assertEqual(flt1.__dict__ == flt2.__dict__, flt1 == flt2) self.assertNotEqual(flt1, flt3) self.assertEqual(flt1.__dict__ == flt3.__dict__, flt1 == flt3)
def commit(self): if len(self.selection): cluster_ids = set() column_ids = set() for (ir, ic) in self.selection: cluster_ids.add(ir) column_ids.add(ic) new_domain = Domain([ self.data.domain[self.columns.values[col]] for col in column_ids ], self.data.domain.class_vars, self.data.domain.metas) selected_data = Values([ FilterDiscrete(self.cluster_var, [self.clusters[ir]]) for ir in cluster_ids ], conjunction=False)(self.data) selected_data = selected_data.transform(new_domain) annotated_data = create_annotated_table( self.data.transform(new_domain), np.where(np.in1d(self.data.ids, selected_data.ids, True))) else: selected_data = None annotated_data = create_annotated_table(self.data, []) if self.ca is not None and self._task is None: table = self.ca.create_contingency_table() else: table = None self.Outputs.selected_data.send(selected_data) self.Outputs.annotated_data.send(annotated_data) self.Outputs.contingency.send(table)
def __init__(self, dist, attr, group_val_index=None, group_var=None): self.dist = dist self.n = n = np.sum(dist[1]) if n == 0: return self.a_min = float(dist[0, 0]) self.a_max = float(dist[0, -1]) self.mean = float(np.sum(dist[0] * dist[1]) / n) self.var = float(np.sum(dist[1] * (dist[0] - self.mean)**2) / n) self.dev = math.sqrt(self.var) s = 0 thresholds = [n / 4, n / 2, n / 4 * 3] thresh_i = 0 q = [] for i, e in enumerate(dist[1]): s += e if s >= thresholds[thresh_i]: if s == thresholds[thresh_i] and i + 1 < dist.shape[1]: q.append(float((dist[0, i] + dist[0, i + 1]) / 2)) else: q.append(float(dist[0, i])) thresh_i += 1 if thresh_i == 3: self.q25, self.median, self.q75 = q break else: self.q25 = self.q75 = None self.median = q[1] if len(q) == 2 else None self.conditions = [ FilterContinuous(attr, FilterContinuous.Between, self.q25, self.q75) ] if group_val_index is not None: self.conditions.append(FilterDiscrete(group_var, [group_val_index]))
def selectionChanged(self, *args): super().selectionChanged(*args) rows = list({ index.row() for index in self.selectionModel().selectedRows(0) }) if not rows: owwidget.Outputs.data.send(None) return model = self.model().tolist() filters = [ Values([ FilterDiscrete(self._vars[col], {model[row][col]}) for col in range(len(self._vars)) ]) for row in self.model().mapToSourceRows(rows) ] data = Values(filters, conjunction=False)(owwidget.data) annotated = create_annotated_table(owwidget.data, data.ids) owwidget.Outputs.selected_data.send(data) owwidget.Outputs.data.send(annotated)
def commit(self): if len(self.selection): cells = [] for ir, r in enumerate(self.rows.values): for ic, c in enumerate(self.columns.values): if (ir, ic) in self.selection: cells.append( Values([ FilterDiscrete(self.rows, [r]), FilterDiscrete(self.columns, [c]) ])) selected_data = Values(cells, conjunction=False)(self.data) annotated_data = create_annotated_table( self.data, np.where(np.in1d(self.data.ids, selected_data.ids, True))) else: selected_data = None annotated_data = create_annotated_table(self.data, []) self.Outputs.contingency.send(self.table) self.Outputs.selected_data.send(selected_data) self.Outputs.annotated_data.send(annotated_data)
def get_filtered_data(self): if not self.data or not self.selection or not self.pivot.pivot_table: return None cond = [] for i, j in self.selection: f = [] for at, val in [(self.row_feature, self.pivot.pivot_table.X[i, 0]), (self.col_feature, j)]: if isinstance(at, DiscreteVariable): f.append(FilterDiscrete(at, [val])) elif isinstance(at, ContinuousVariable): f.append(FilterContinuous(at, FilterContinuous.Equal, val)) cond.append(Values(f)) return Values(cond, conjunction=False)(self.data)
def test_values(self): vs = self.iris.domain.variables f1 = FilterContinuous(vs[0], FilterContinuous.Less, 5) f2 = FilterContinuous(vs[1], FilterContinuous.Greater, 3) f3 = FilterDiscrete(vs[4], [2]) f12 = Values([f1, f2], conjunction=False, negate=True) f123 = Values([f12, f3]) d12 = f12(self.iris) d123 = f123(self.iris) self.assertGreater(len(d12), len(d123)) self.assertTrue((d123.X[:, 0] >= 5).all()) self.assertTrue((d123.X[:, 1] <= 3).all()) self.assertTrue((d123.Y == 2).all()) self.assertEqual(len(d123), (~((self.iris.X[:, 0] < 5) | (self.iris.X[:, 1] > 3)) & (self.iris.Y == 2)).sum())
def commit(self): if self.data is None: self.Outputs.selected_data.send(None) self.Outputs.annotated_data.send(None) self.Outputs.contingency.send(None) return if len(self.selection_indices): cluster_ids = set() gene_ids = set() for (ir, ic) in self.selection_indices: if not self.transpose: cluster_ids.add(ir) gene_ids.add(ic) else: cluster_ids.add(ic) gene_ids.add(ir) columns = self.clusters if self.transpose else [str(x) for x in self.matrix.domain.attributes] rows = self.clusters if not self.transpose else [str(x) for x in self.matrix.domain.attributes] new_domain = Domain([self.data.domain[columns[i]] for i in gene_ids], self.data.domain.class_vars, self.data.domain.metas) selected_data = Values([FilterDiscrete(self.cluster_var, [rows[i]]) for i in cluster_ids], conjunction=False)(self.data) selected_data = selected_data.transform(new_domain) annotated_data = create_annotated_table(self.data, np.where(np.in1d(self.data.ids, selected_data.ids, True))) else: selected_data = None annotated_data = create_annotated_table(self.data, []) clusters_values = list(set(self.clusters)) table = ClusterAnalysis.contingency_table( self.matrix, DiscreteVariable("Gene" if self.transpose else self.cluster_var.name, clusters_values), [str(x) for x in self.matrix.domain.attributes], [[clusters_values.index(c)] for c in self.clusters] ) self.Outputs.selected_data.send(selected_data) self.Outputs.annotated_data.send(annotated_data) self.Outputs.contingency.send(table)
def test_reprs(self): flid = IsDefined(negate=True) flhc = HasClass() flr = Random() fld = FilterDiscrete(self.attr_disc, None) flsv = SameValue(self.attr_disc, self.value_disc, negate=True) flc = FilterContinuous(self.vs[0], FilterContinuous.Less, 5) flc2 = FilterContinuous(self.vs[1], FilterContinuous.Greater, 3) flv = Values([flc, flc2], conjunction=False, negate=True) flvf = ValueFilter(self.attr_disc) fls = FilterString("name", FilterString.Equal, "Aardvark", case_sensitive=False) flsl = FilterStringList("name", ["Aardvark"], case_sensitive=False) flrx = FilterRegex("name", "^c...$") filters = [flid, flhc, flr, fld, flsv, flc, flv, flvf, fls, flsl, flrx] for f in filters: repr_str = repr(f) new_f = eval(repr_str) self.assertEqual(repr(new_f), repr_str)
def __init__(self, dist, attr, group_val_index=None, group_var=None): self.dist = dist self.n = n = np.sum(dist[1]) if n == 0: return self.a_min = float(dist[0, 0]) self.a_max = float(dist[0, -1]) self.mean = float(np.sum(dist[0] * dist[1]) / n) self.var = float(np.sum(dist[1] * (dist[0] - self.mean) ** 2) / n) self.dev = math.sqrt(self.var) a, freq = np.asarray(dist) q25, median, q75 = _quantiles(a, freq, [0.25, 0.5, 0.75]) self.median = median # The code below omits the q25 or q75 in the plot when they are None self.q25 = None if q25 == median else q25 self.q75 = None if q75 == median else q75 self.conditions = [FilterContinuous(attr, FilterContinuous.Between, self.q25, self.q75)] if group_val_index is not None: self.conditions.append(FilterDiscrete(group_var, [group_val_index]))
def commit(self): averages = None if self.data is not None: if self.group_var is None: averages = self.average_table(self.data) else: parts = [] for value in self.group_var.values: svfilter = SameValue(self.group_var, value) v_table = self.average_table(svfilter(self.data)) parts.append(v_table) # Using "None" as in OWSelectRows # Values is required because FilterDiscrete doesn't have # negate keyword or IsDefined method deffilter = Values( conditions=[FilterDiscrete(self.group_var, None)], negate=True) v_table = self.average_table(deffilter(self.data)) parts.append(v_table) averages = Orange.data.Table.concatenate(parts, axis=0) self.Outputs.averages.send(averages)
def commit(self): if len(self.selection): cluster_ids = set() gene_ids = set() for (ir, ic) in self.selection: if not self.transpose: cluster_ids.add(ir) gene_ids.add(ic) else: cluster_ids.add(ic) gene_ids.add(ir) new_domain = Domain( [self.data.domain[self.genes[i]] for i in gene_ids], self.data.domain.class_vars, self.data.domain.metas) selected_data = Values([ FilterDiscrete(self.cluster_var, [self.clusters[i]]) for i in cluster_ids ], conjunction=False)(self.data) selected_data = selected_data.transform(new_domain) annotated_data = create_annotated_table( self.data.transform(new_domain), np.where(np.in1d(self.data.ids, selected_data.ids, True))) else: selected_data = None annotated_data = create_annotated_table(self.data, []) if self.matrix is not None: table = ClusterAnalysis.contingency_table( self.matrix, DiscreteVariable(self.cluster_var.name, np.array(self.clusters)), np.array(self.genes)[self.gene_order], self.cluster_order[..., np.newaxis]) else: table = None self.Outputs.selected_data.send(selected_data) self.Outputs.annotated_data.send(annotated_data) self.Outputs.contingency.send(table)
def bench_adult_filter_pre_pandas(self): age_filter = FilterContinuous(self.adult.domain["age"], FilterContinuous.Greater, 30) workclass_filter = FilterDiscrete(self.adult.domain["workclass"], [0]) combined = Values([age_filter, workclass_filter]) combined(self.adult)