def test_selected_columns_negated(self): filtered_data = \ filter.IsDefined(negate=True, columns=[4])(self.table) correct_data = [row for row in self.data if not row[4]] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def test_can_inherit_is_defined_filter(self): filtered_data = filter.IsDefined(columns=[1])(self.table) filtered_data = filtered_data[:, 4] correct_data = [[row[4]]for row in self.data if row[1]] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def commit(self): if self.data is not None: states = [self.state_for_column(i) for i in range(len(self.varmodel))] # Columns to filter unknowns by dropping rows. filter_columns = [i for i, state in enumerate(states) if state.method.short == "drop"] impute_columns = [i for i, state in enumerate(states) if state.method.short not in ["drop", "leave"]] imputers = [(self.varmodel[i], self.imputer_for_column(i)) for i in impute_columns] data = self.data if imputers: table_imputer = ImputerModel(data.domain, dict(imputers)) data = table_imputer(data) if filter_columns: filter_ = data_filter.IsDefined(filter_columns) data = filter_(data) else: data = None self.send("Data", data) self.modified = False
def __call__(self, data): trans = self.transformers[0] filter_ = data_filter.IsDefined([trans.variable], negate=True) data_with_unknowns = filter_(data) values = trans(data_with_unknowns) domain = Orange.data.Domain([trans.variable]) X = Orange.data.Table.from_table(domain, data) X.X[numpy.isnan(X), :] = values return X
def commit(self): matching_output = self.data non_matching_output = None annotated_output = None self.Error.clear() if self.data: domain = self.data.domain conditions = [] for attr_name, oper_idx, values in self.conditions: if attr_name in self.AllTypes: attr_index = attr = None attr_type = self.AllTypes[attr_name] operators = self.Operators[attr_name] else: attr_index = domain.index(attr_name) attr = domain[attr_index] attr_type = vartype(attr) operators = self.Operators[type(attr)] opertype, _ = operators[oper_idx] if attr_type == 0: filt = data_filter.IsDefined() elif attr_type in (2, 4): # continuous, time try: floats = self._values_to_floats(attr, values) except ValueError as e: self.Error.parsing_error(e.args[0]) return if floats is None: continue filt = data_filter.FilterContinuous( attr_index, opertype, *floats) elif attr_type == 3: # string filt = data_filter.FilterString(attr_index, opertype, *[str(v) for v in values]) else: if opertype == FilterDiscreteType.IsDefined: f_values = None else: if not values or not values[0]: continue values = [attr.values[i - 1] for i in values] if opertype == FilterDiscreteType.Equal: f_values = {values[0]} elif opertype == FilterDiscreteType.NotEqual: f_values = set(attr.values) f_values.remove(values[0]) elif opertype == FilterDiscreteType.In: f_values = set(values) else: raise ValueError("invalid operand") filt = data_filter.FilterDiscrete(attr_index, f_values) conditions.append(filt) if conditions: filters = data_filter.Values(conditions) matching_output = filters(self.data) filters.negate = True non_matching_output = filters(self.data) row_sel = np.in1d(self.data.ids, matching_output.ids) annotated_output = create_annotated_table(self.data, row_sel) # if hasattr(self.data, "name"): # matching_output.name = self.data.name # non_matching_output.name = self.data.name purge_attrs = self.purge_attributes purge_classes = self.purge_classes if (purge_attrs or purge_classes) and \ not isinstance(self.data, SqlTable): attr_flags = sum([ Remove.RemoveConstant * purge_attrs, Remove.RemoveUnusedValues * purge_attrs ]) class_flags = sum([ Remove.RemoveConstant * purge_classes, Remove.RemoveUnusedValues * purge_classes ]) # same settings used for attributes and meta features remover = Remove(attr_flags, class_flags, attr_flags) matching_output = remover(matching_output) non_matching_output = remover(non_matching_output) annotated_output = remover(annotated_output) if not matching_output: matching_output = None if not non_matching_output: non_matching_output = None if not annotated_output: annotated_output = None self.Outputs.matching_data.send(matching_output) self.Outputs.unmatched_data.send(non_matching_output) self.Outputs.annotated_data.send(annotated_output) self.match_desc = report.describe_data_brief(matching_output) self.nonmatch_desc = report.describe_data_brief(non_matching_output) summary = matching_output.approx_len() if matching_output else \ self.info.NoOutput details = format_summary_details( matching_output) if matching_output else "" self.info.set_output_summary(summary, details)
def test_all_columns_negated(self): filtered_data = filter.IsDefined(negate=True)(self.table) correct_data = [row for row in self.data if not all(row)] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def test_selected_columns(self): filtered_data = filter.IsDefined(columns=[0])(self.table) correct_data = [row for row in self.data if row[0]] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)
def test_on_all_columns(self): filtered_data = filter.IsDefined()(self.table) correct_data = [row for row in self.data if all(row)] self.assertEqual(len(filtered_data), len(correct_data)) self.assertSequenceEqual(filtered_data, correct_data)