def set_output_summary(self, data): summary = len(data) if data else self.info.NoOutput detail = format_summary_details(data) if data else "" self.info.set_output_summary(summary, detail)
def commit(self): matching_output = self.data non_matching_output = None annotated_output = None self.Error.clear() if self.data: domain = self.data.domain conditions = [] for attr_name, oper_idx, values in self.conditions: if attr_name in self.AllTypes: attr_index = attr = None attr_type = self.AllTypes[attr_name] operators = self.Operators[attr_name] else: attr_index = domain.index(attr_name) attr = domain[attr_index] attr_type = vartype(attr) operators = self.Operators[type(attr)] opertype, _ = operators[oper_idx] if attr_type == 0: filter = data_filter.IsDefined() elif attr_type in (2, 4): # continuous, time try: floats = self._values_to_floats(attr, values) except ValueError as e: self.Error.parsing_error(e.args[0]) return if floats is None: continue filter = data_filter.FilterContinuous( attr_index, opertype, *floats) elif attr_type == 3: # string filter = data_filter.FilterString( attr_index, opertype, *[str(v) for v in values]) else: if opertype == FilterDiscreteType.IsDefined: f_values = None else: if not values or not values[0]: continue values = [attr.values[i - 1] for i in values] if opertype == FilterDiscreteType.Equal: f_values = {values[0]} elif opertype == FilterDiscreteType.NotEqual: f_values = set(attr.values) f_values.remove(values[0]) elif opertype == FilterDiscreteType.In: f_values = set(values) else: raise ValueError("invalid operand") filter = data_filter.FilterDiscrete(attr_index, f_values) conditions.append(filter) if conditions: self.filters = data_filter.Values(conditions) matching_output = self.filters(self.data) self.filters.negate = True non_matching_output = self.filters(self.data) row_sel = np.in1d(self.data.ids, matching_output.ids) annotated_output = create_annotated_table(self.data, row_sel) # if hasattr(self.data, "name"): # matching_output.name = self.data.name # non_matching_output.name = self.data.name purge_attrs = self.purge_attributes purge_classes = self.purge_classes if (purge_attrs or purge_classes) and \ not isinstance(self.data, SqlTable): attr_flags = sum([ Remove.RemoveConstant * purge_attrs, Remove.RemoveUnusedValues * purge_attrs ]) class_flags = sum([ Remove.RemoveConstant * purge_classes, Remove.RemoveUnusedValues * purge_classes ]) # same settings used for attributes and meta features remover = Remove(attr_flags, class_flags, attr_flags) matching_output = remover(matching_output) non_matching_output = remover(non_matching_output) annotated_output = remover(annotated_output) if matching_output is not None and not len(matching_output): matching_output = None if non_matching_output is not None and not len(non_matching_output): non_matching_output = None if annotated_output is not None and not len(annotated_output): annotated_output = None self.Outputs.matching_data.send(matching_output) self.Outputs.unmatched_data.send(non_matching_output) self.Outputs.annotated_data.send(annotated_output) self.match_desc = report.describe_data_brief(matching_output) self.nonmatch_desc = report.describe_data_brief(non_matching_output) summary = matching_output.approx_len() if matching_output else \ self.info.NoOutput details = format_summary_details( matching_output) if matching_output else "" self.info.set_output_summary(summary, details)
def _set_output_summary(self, data: Optional[Table] = None): if data: summary, details = len(data), format_summary_details(data) else: summary, details = self.info.NoOutput, "" self.info.set_output_summary(summary, details)
def _set_output_summary(self, output): summary = len(output) if output else self.info.NoOutput details = format_summary_details(output) if output else "" self.info.set_output_summary(summary, details)
def _set_input_summary(self): summary = len(self.data) if self.data else self.info.NoInput details = format_summary_details(self.data) if self.data else "" self.info.set_input_summary(summary, details)
def test_output(self): # send data and template data info = self.widget.info no_input, no_output = "No data on input", "No data on output" self.send_signal(self.widget.Inputs.data, self.data[::15]) self.send_signal(self.widget.Inputs.template_data, self.disc_data) output = self.get_output(self.widget.Outputs.transformed_data) self.assertTableEqual(output, self.disc_data[::15]) self.assertEqual("Input data with 10 instances and 4 features.", self.widget.input_label.text()) self.assertEqual("Template domain applied.", self.widget.template_label.text()) self.assertEqual("Output data includes 4 features.", self.widget.output_label.text()) data_list = [("Data", self.data[::15]), ("Template data", self.disc_data)] summary, details = "10, 150", format_multiple_summaries(data_list) self.assertEqual(info._StateInfo__input_summary.brief, summary) self.assertEqual(info._StateInfo__input_summary.details, details) summary, details = "10", format_summary_details(output) self.assertEqual(info._StateInfo__output_summary.brief, summary) self.assertEqual(info._StateInfo__output_summary.details, details) # remove template data self.send_signal(self.widget.Inputs.template_data, None) output = self.get_output(self.widget.Outputs.transformed_data) self.assertIsNone(output) self.assertEqual("Input data with 10 instances and 4 features.", self.widget.input_label.text()) self.assertEqual("No template data on input.", self.widget.template_label.text()) self.assertEqual("", self.widget.output_label.text()) data_list = [("Data", self.data[::15]), ("Template data", None)] summary, details = "10, 0", format_multiple_summaries(data_list) self.assertEqual(info._StateInfo__input_summary.brief, summary) self.assertEqual(info._StateInfo__input_summary.details, details) self.assertEqual(info._StateInfo__output_summary.brief, "-") self.assertEqual(info._StateInfo__output_summary.details, no_output) # send template data self.send_signal(self.widget.Inputs.template_data, self.disc_data) output = self.get_output(self.widget.Outputs.transformed_data) self.assertTableEqual(output, self.disc_data[::15]) self.assertEqual("Input data with 10 instances and 4 features.", self.widget.input_label.text()) self.assertEqual("Template domain applied.", self.widget.template_label.text()) self.assertEqual("Output data includes 4 features.", self.widget.output_label.text()) # remove data self.send_signal(self.widget.Inputs.data, None) output = self.get_output(self.widget.Outputs.transformed_data) self.assertIsNone(output) self.assertEqual("No data on input.", self.widget.input_label.text()) self.assertEqual("Template data includes 4 features.", self.widget.template_label.text()) self.assertEqual("", self.widget.output_label.text()) data_list = [("Data", None), ("Template data", self.disc_data)] summary, details = "0, 150", format_multiple_summaries(data_list) self.assertEqual(info._StateInfo__input_summary.brief, summary) self.assertEqual(info._StateInfo__input_summary.details, details) self.assertEqual(info._StateInfo__output_summary.brief, "-") self.assertEqual(info._StateInfo__output_summary.details, no_output) # remove template data self.send_signal(self.widget.Inputs.template_data, None) self.assertEqual("No data on input.", self.widget.input_label.text()) self.assertEqual("No template data on input.", self.widget.template_label.text()) self.assertEqual("", self.widget.output_label.text()) self.assertEqual(info._StateInfo__input_summary.brief, "-") self.assertEqual(info._StateInfo__input_summary.details, no_input) self.assertEqual(info._StateInfo__output_summary.brief, "-") self.assertEqual(info._StateInfo__output_summary.details, no_output)
def test_summary(self): """Check if status bar is updated when data is received""" info = self.widget.info no_input, no_output = "No data on input", "No data on output" zoo = Table("zoo") data_list = [("zoo", zoo)] self.send_signal(self.widget.Inputs.data, zoo, 1) summary, details = "101", format_multiple_summaries(data_list) self.assertEqual(info._StateInfo__input_summary.brief, summary) self.assertEqual(info._StateInfo__input_summary.details, details) self.assertEqual(info._StateInfo__output_summary.brief, "") self.assertEqual(info._StateInfo__output_summary.details, no_output) self._select_data() output = self.get_output(self.widget.Outputs.selected_data) summary, details = f"{len(output)}", format_summary_details(output) self.assertEqual(info._StateInfo__output_summary.brief, summary) self.assertEqual(info._StateInfo__output_summary.details, details) iris = Table("iris") data_list = [("zoo", zoo), ("iris", iris)] self.send_signal(self.widget.Inputs.data, iris, 2) summary, details = "101, 150", format_multiple_summaries(data_list) self.assertEqual(info._StateInfo__input_summary.brief, summary) self.assertEqual(info._StateInfo__input_summary.details, details) self.assertEqual(info._StateInfo__output_summary.brief, "") self.assertEqual(info._StateInfo__output_summary.details, no_output) self._select_data() output = self.get_output(self.widget.Outputs.selected_data) summary, details = f"{len(output)}", format_summary_details(output) self.assertEqual(info._StateInfo__output_summary.brief, summary) self.assertEqual(info._StateInfo__output_summary.details, details) brown = Table("brown-selected") data_list = [("zoo", zoo), ("iris", iris), ("brown-selected", brown)] self.send_signal(self.widget.Inputs.data, brown, 3) summary, details = "101, 150, 186", format_multiple_summaries(data_list) self.assertEqual(info._StateInfo__input_summary.brief, summary) self.assertEqual(info._StateInfo__input_summary.details, details) self._select_data() output = self.get_output(self.widget.Outputs.selected_data) summary, details = f"{len(output)}", format_summary_details(output) self.assertEqual(info._StateInfo__output_summary.brief, summary) self.assertEqual(info._StateInfo__output_summary.details, details) self.send_signal(self.widget.Inputs.data, None, 1) data_list = [("iris", iris), ("brown-selected", brown)] summary, details = "150, 186", format_multiple_summaries(data_list) self.assertEqual(info._StateInfo__input_summary.brief, summary) self.assertEqual(info._StateInfo__input_summary.details, details) self.assertEqual(info._StateInfo__output_summary.brief, "") self.assertEqual(info._StateInfo__output_summary.details, no_output) self._select_data() output = self.get_output(self.widget.Outputs.selected_data) summary, details = f"{len(output)}", format_summary_details(output) self.assertEqual(info._StateInfo__output_summary.brief, summary) self.assertEqual(info._StateInfo__output_summary.details, details) self.send_signal(self.widget.Inputs.data, None, 2) self.send_signal(self.widget.Inputs.data, None, 3) self.assertEqual(info._StateInfo__input_summary.brief, "") self.assertEqual(info._StateInfo__input_summary.details, no_input) self.assertEqual(info._StateInfo__output_summary.brief, "") self.assertEqual(info._StateInfo__output_summary.details, no_output)
def __commit_finish(self): assert QThread.currentThread() is self.thread() assert self.__task is not None futures = self.__task.futures assert len(futures) == len(self.varmodel) assert self.data is not None def get_variable(variable, future, drop_mask) \ -> Optional[List[Orange.data.Variable]]: # Returns a (potentially empty) list of variables, # or None on failure that should interrupt the imputation assert future.done() try: res = future.result() except SparseNotSupported: self.Error.model_based_imputer_sparse() return [] # None? except VariableNotSupported: self.Warning.cant_handle_var(variable.name) return [] except Exception: # pylint: disable=broad-except log = logging.getLogger(__name__) log.info("Error for %s", variable.name, exc_info=True) self.Error.imputation_failed(variable.name) return None if isinstance(res, RowMask): drop_mask |= res.mask newvar = variable else: newvar = res if isinstance(newvar, Orange.data.Variable): newvar = [newvar] return newvar def create_data(attributes, class_vars): domain = Orange.data.Domain(attributes, class_vars, self.data.domain.metas) try: return self.data.from_table(domain, self.data[~drop_mask]) except Exception: # pylint: disable=broad-except log = logging.getLogger(__name__) log.info("Error", exc_info=True) self.Error.imputation_failed("Unknown") return None self.__task = None self.setInvalidated(False) self.progressBarFinished() attributes = [] class_vars = [] drop_mask = np.zeros(len(self.data), bool) for i, (var, fut) in enumerate(zip(self.varmodel, futures)): newvar = get_variable(var, fut, drop_mask) if newvar is None: data = None break if i < len(self.data.domain.attributes): attributes.extend(newvar) else: class_vars.extend(newvar) else: data = create_data(attributes, class_vars) self.Outputs.data.send(data) self.modified = False summary = len(data) if data else self.info.NoOutput details = format_summary_details(data) if data else "" self.info.set_output_summary(summary, details)
def test_details(self): """Check if details part of the summary is formatted correctly""" data = Table('zoo') n_features = len(data.domain.variables) + len(data.domain.metas) details = f'{len(data)} instances, ' \ f'{n_features} features\n' \ f'Features: {len(data.domain.attributes)} categorical\n' \ f'Target: categorical\n' \ f'Metas: string (not shown)' self.assertEqual(details, format_summary_details(data)) data = Table('housing') n_features = len(data.domain.variables) + len(data.domain.metas) details = f'{len(data)} instances, ' \ f'{n_features} features\n' \ f'Features: {len(data.domain.attributes)} numeric\n' \ f'Target: numeric\n' \ f'Metas: —' self.assertEqual(details, format_summary_details(data)) data = Table('heart_disease') n_features = len(data.domain.variables) + len(data.domain.metas) details = f'{len(data)} instances, ' \ f'{n_features} features\n' \ f'Features: {len(data.domain.attributes)} ' \ f'(7 categorical, 6 numeric)\n' \ f'Target: categorical\n' \ f'Metas: —' self.assertEqual(details, format_summary_details(data)) data = make_table([continuous_full, continuous_missing], target=[rgb_full, rgb_missing], metas=[ints_full, ints_missing]) n_features = len(data.domain.variables) + len(data.domain.metas) details = f'{len(data)} instances, ' \ f'{n_features} features\n' \ f'Features: {len(data.domain.attributes)} numeric\n' \ f'Target: {len(data.domain.class_vars)} categorical\n' \ f'Metas: {len(data.domain.metas)} categorical' self.assertEqual(details, format_summary_details(data)) data = make_table([continuous_full, time_full, ints_full, rgb_missing], target=[rgb_full, continuous_missing], metas=[string_full, string_missing]) n_features = len(data.domain.variables) + len(data.domain.metas) details = f'{len(data)} instances, ' \ f'{n_features} features\n' \ f'Features: {len(data.domain.attributes)} ' \ f'(2 categorical, 1 numeric, 1 time)\n' \ f'Target: {len(data.domain.class_vars)} ' \ f'(1 categorical, 1 numeric)\n' \ f'Metas: {len(data.domain.metas)} string (not shown)' self.assertEqual(details, format_summary_details(data)) data = make_table([time_full, time_missing], target=[ints_missing], metas=None) details = f'{len(data)} instances, ' \ f'{len(data.domain.variables)} features\n' \ f'Features: {len(data.domain.attributes)} time\n'\ f'Target: categorical\n' \ f'Metas: —' self.assertEqual(details, format_summary_details(data)) data = make_table([rgb_full, ints_full], target=None, metas=None) details = f'{len(data)} instances, ' \ f'{len(data.domain.variables)} features\n' \ f'Features: {len(data.domain.variables)} categorical\n' \ f'Target: —\n' \ f'Metas: —' self.assertEqual(details, format_summary_details(data)) data = make_table([rgb_full], target=None, metas=None) details = f'{len(data)} instances, ' \ f'{len(data.domain.variables)} feature\n' \ f'Features: categorical\n' \ f'Target: —\n' \ f'Metas: —' self.assertEqual(details, format_summary_details(data)) data = None self.assertEqual('', format_summary_details(data))
def set_data(self, data: Optional[Table]): self.data = data summary = len(data) if data else self.info.NoInput details = format_summary_details(data) if data else "" self.info.set_input_summary(summary, details)
def _set_input_summary(self, dataset): summary = len(dataset) if dataset else self.info.NoInput details = format_summary_details(dataset) if dataset else "" self.info.set_input_summary(summary, details)
def _set_summary(data, empty, setter): summary = len(data) if data else empty details = format_summary_details(data) if data else "" setter(summary, details)
def set_data(self, data=None): self.update_domain_role_hints() self.closeContext() self.data = data if data is not None: self.openContext(data) all_vars = data.domain.variables + data.domain.metas var_sig = lambda attr: (attr.name, vartype(attr)) domain_hints = { var_sig(attr): ("attribute", i) for i, attr in enumerate(data.domain.attributes) } domain_hints.update({ var_sig(attr): ("meta", i) for i, attr in enumerate(data.domain.metas) }) if data.domain.class_vars: domain_hints.update({ var_sig(attr): ("class", i) for i, attr in enumerate(data.domain.class_vars) }) # update the hints from context settings domain_hints.update(self.domain_role_hints) attrs_for_role = lambda role: [ (domain_hints[var_sig(attr)][1], attr) for attr in all_vars if domain_hints[var_sig(attr)][0] == role ] attributes = [ attr for place, attr in sorted(attrs_for_role("attribute"), key=lambda a: a[0]) ] classes = [ attr for place, attr in sorted(attrs_for_role("class"), key=lambda a: a[0]) ] metas = [ attr for place, attr in sorted(attrs_for_role("meta"), key=lambda a: a[0]) ] available = [ attr for place, attr in sorted(attrs_for_role("available"), key=lambda a: a[0]) ] self.used_attrs[:] = attributes self.class_attrs[:] = classes self.meta_attrs[:] = metas self.available_attrs[:] = available self.info.set_input_summary(len(data), format_summary_details(data)) else: self.used_attrs[:] = [] self.class_attrs[:] = [] self.meta_attrs[:] = [] self.available_attrs[:] = [] self.info.set_input_summary(self.info.NoInput)
def set_data(self, data): self.data = data summary = len(data) if data else self.info.NoInput details = format_summary_details(data) if data else "" self.info.set_input_summary(summary, details) self.unconditional_apply()