class OWPredictions(OWWidget): name = "Predictions" icon = "icons/Predictions.svg" priority = 200 description = "Display predictions of models for an input dataset." keywords = [] class Inputs: data = Input("Data", Orange.data.Table) predictors = Input("Predictors", Model, multiple=True) class Outputs: predictions = Output("Predictions", Orange.data.Table) evaluation_results = Output("Evaluation Results", Results) class Warning(OWWidget.Warning): empty_data = Msg("Empty dataset") wrong_targets = Msg( "Some model(s) predict a different target (see more ...)\n{}") class Error(OWWidget.Error): predictor_failed = Msg("Some predictor(s) failed (see more ...)\n{}") scorer_failed = Msg("Some scorer(s) failed (see more ...)\n{}") settingsHandler = settings.ClassValuesContextHandler() score_table = settings.SettingProvider(ScoreTable) #: List of selected class value indices in the `class_values` list selected_classes = settings.ContextSetting([]) selection = settings.Setting([], schema_only=True) def __init__(self): super().__init__() self.data = None # type: Optional[Orange.data.Table] self.predictors = {} # type: Dict[object, PredictorSlot] self.class_values = [] # type: List[str] self._delegates = [] self.left_width = 10 self.selection_store = None self.__pending_selection = self.selection self._set_input_summary() self._set_output_summary(None) gui.listBox(self.controlArea, self, "selected_classes", "class_values", box="Show probabibilities for", callback=self._update_prediction_delegate, selectionMode=QListWidget.ExtendedSelection, addSpace=False, sizePolicy=(QSizePolicy.Preferred, QSizePolicy.Preferred)) gui.rubber(self.controlArea) self.reset_button = gui.button( self.controlArea, self, "Restore Original Order", callback=self._reset_order, tooltip="Show rows in the original order") table_opts = dict(horizontalScrollBarPolicy=Qt.ScrollBarAlwaysOn, horizontalScrollMode=QTableView.ScrollPerPixel, selectionMode=QTableView.ExtendedSelection, focusPolicy=Qt.StrongFocus) self.dataview = TableView(sortingEnabled=True, verticalScrollBarPolicy=Qt.ScrollBarAlwaysOn, **table_opts) self.predictionsview = TableView( sortingEnabled=True, verticalScrollBarPolicy=Qt.ScrollBarAlwaysOff, **table_opts) self.dataview.verticalHeader().hide() dsbar = self.dataview.verticalScrollBar() psbar = self.predictionsview.verticalScrollBar() psbar.valueChanged.connect(dsbar.setValue) dsbar.valueChanged.connect(psbar.setValue) self.dataview.verticalHeader().setDefaultSectionSize(22) self.predictionsview.verticalHeader().setDefaultSectionSize(22) self.dataview.verticalHeader().sectionResized.connect( lambda index, _, size: self.predictionsview.verticalHeader( ).resizeSection(index, size)) self.dataview.setItemDelegate(DataItemDelegate(self.dataview)) self.splitter = QSplitter(orientation=Qt.Horizontal, childrenCollapsible=False, handleWidth=2) self.splitter.splitterMoved.connect(self.splitter_resized) self.splitter.addWidget(self.predictionsview) self.splitter.addWidget(self.dataview) self.score_table = ScoreTable(self) self.vsplitter = gui.vBox(self.mainArea) self.vsplitter.layout().addWidget(self.splitter) self.vsplitter.layout().addWidget(self.score_table.view) def get_selection_store(self, proxy): # Both proxies map the same, so it doesn't matter which one is used # to initialize SharedSelectionStore if self.selection_store is None: self.selection_store = SharedSelectionStore(proxy) return self.selection_store @Inputs.data @check_sql_input def set_data(self, data): self.Warning.empty_data(shown=data is not None and not data) self.data = data self.selection_store = None if not data: self.dataview.setModel(None) self.predictionsview.setModel(None) else: # force full reset of the view's HeaderView state self.dataview.setModel(None) model = TableModel(data, parent=None) modelproxy = SortProxyModel() modelproxy.setSourceModel(model) self.dataview.setModel(modelproxy) sel_model = SharedSelectionModel( self.get_selection_store(modelproxy), modelproxy, self.dataview) self.dataview.setSelectionModel(sel_model) if self.__pending_selection is not None: self.selection = self.__pending_selection self.__pending_selection = None self.selection_store.select_rows( set(self.selection), QItemSelectionModel.ClearAndSelect) sel_model.selectionChanged.connect(self.commit) sel_model.selectionChanged.connect(self._store_selection) self.dataview.model().list_sorted.connect( partial(self._update_data_sort_order, self.dataview, self.predictionsview)) self._invalidate_predictions() def _store_selection(self): self.selection = list(self.selection_store.rows) @property def class_var(self): return self.data and self.data.domain.class_var # pylint: disable=redefined-builtin @Inputs.predictors def set_predictor(self, predictor=None, id=None): if id in self.predictors: if predictor is not None: self.predictors[id] = self.predictors[id]._replace( predictor=predictor, name=predictor.name, results=None) else: del self.predictors[id] elif predictor is not None: self.predictors[id] = PredictorSlot(predictor, predictor.name, None) def _set_class_values(self): class_values = [] for slot in self.predictors.values(): class_var = slot.predictor.domain.class_var if class_var and class_var.is_discrete: for value in class_var.values: if value not in class_values: class_values.append(value) if self.class_var and self.class_var.is_discrete: values = self.class_var.values self.class_values = sorted(class_values, key=lambda val: val not in values) self.selected_classes = [ i for i, name in enumerate(class_values) if name in values ] else: self.class_values = class_values # This assignment updates listview self.selected_classes = [] def handleNewSignals(self): self._set_class_values() self._call_predictors() self._update_scores() self._update_predictions_model() self._update_prediction_delegate() self._set_errors() self._set_input_summary() self.commit() def _call_predictors(self): if not self.data: return if self.class_var: domain = self.data.domain classless_data = self.data.transform( Domain(domain.attributes, None, domain.metas)) else: classless_data = self.data for inputid, slot in self.predictors.items(): if isinstance(slot.results, Results): continue predictor = slot.predictor try: if predictor.domain.class_var.is_discrete: pred, prob = predictor(classless_data, Model.ValueProbs) else: pred = predictor(classless_data, Model.Value) prob = numpy.zeros((len(pred), 0)) except (ValueError, DomainTransformationError) as err: self.predictors[inputid] = \ slot._replace(results=f"{predictor.name}: {err}") continue results = Results() results.data = self.data results.domain = self.data.domain results.row_indices = numpy.arange(len(self.data)) results.folds = (Ellipsis, ) results.actual = self.data.Y results.unmapped_probabilities = prob results.unmapped_predicted = pred results.probabilities = results.predicted = None self.predictors[inputid] = slot._replace(results=results) target = predictor.domain.class_var if target != self.class_var: continue if target is not self.class_var and target.is_discrete: backmappers, n_values = predictor.get_backmappers(self.data) prob = predictor.backmap_probs(prob, n_values, backmappers) pred = predictor.backmap_value(pred, prob, n_values, backmappers) results.predicted = pred.reshape((1, len(self.data))) results.probabilities = prob.reshape((1, ) + prob.shape) def _update_scores(self): model = self.score_table.model model.clear() scorers = usable_scorers(self.class_var) if self.class_var else [] self.score_table.update_header(scorers) errors = [] for inputid, pred in self.predictors.items(): results = self.predictors[inputid].results if not isinstance(results, Results) or results.predicted is None: continue row = [ QStandardItem(learner_name(pred.predictor)), QStandardItem("N/A"), QStandardItem("N/A") ] for scorer in scorers: item = QStandardItem() try: score = scorer_caller(scorer, results)()[0] item.setText(f"{score:.3f}") except Exception as exc: # pylint: disable=broad-except item.setToolTip(str(exc)) if scorer.name in self.score_table.shown_scores: errors.append(str(exc)) row.append(item) self.score_table.model.appendRow(row) view = self.score_table.view if model.rowCount(): view.setVisible(True) view.ensurePolished() view.setFixedHeight(5 + view.horizontalHeader().height() + view.verticalHeader().sectionSize(0) * model.rowCount()) else: view.setVisible(False) self.Error.scorer_failed("\n".join(errors), shown=bool(errors)) def _set_errors(self): # Not all predictors are run every time, so errors can't be collected # in _call_predictors errors = "\n".join(f"- {p.predictor.name}: {p.results}" for p in self.predictors.values() if isinstance(p.results, str) and p.results) self.Error.predictor_failed(errors, shown=bool(errors)) if self.class_var: inv_targets = "\n".join( f"- {pred.name} predicts '{pred.domain.class_var.name}'" for pred in (p.predictor for p in self.predictors.values() if isinstance(p.results, Results) and p.results.probabilities is None)) self.Warning.wrong_targets(inv_targets, shown=bool(inv_targets)) else: self.Warning.wrong_targets.clear() def _set_input_summary(self): if not self.data and not self.predictors: self.info.set_input_summary(self.info.NoInput) return summary = len(self.data) if self.data else 0 details = self._get_details() self.info.set_input_summary(summary, details, format=Qt.RichText) def _get_details(self): details = "Data:<br>" details += format_summary_details(self.data).replace('\n', '<br>') if \ self.data else "No data on input." details += "<hr>" pred_names = [v.name for v in self.predictors.values()] n_predictors = len(self.predictors) if n_predictors: n_valid = len(self._non_errored_predictors()) details += plural("Model: {number} model{s}", n_predictors) if n_valid != n_predictors: details += f" ({n_predictors - n_valid} failed)" details += "<ul>" for name in pred_names: details += f"<li>{name}</li>" details += "</ul>" else: details += "Model:<br>No model on input." return details def _set_output_summary(self, output): summary = len(output) if output else self.info.NoOutput details = format_summary_details(output) if output else "" self.info.set_output_summary(summary, details) def _invalidate_predictions(self): for inputid, pred in list(self.predictors.items()): self.predictors[inputid] = pred._replace(results=None) def _non_errored_predictors(self): return [ p for p in self.predictors.values() if isinstance(p.results, Results) ] def _reordered_probabilities(self, prediction): cur_values = prediction.predictor.domain.class_var.values new_ind = [self.class_values.index(x) for x in cur_values] probs = prediction.results.unmapped_probabilities new_probs = numpy.full((probs.shape[0], len(self.class_values)), numpy.nan) new_probs[:, new_ind] = probs return new_probs def _update_predictions_model(self): results = [] headers = [] for p in self._non_errored_predictors(): values = p.results.unmapped_predicted target = p.predictor.domain.class_var if target.is_discrete: # order probabilities in order from Show prob. for prob = self._reordered_probabilities(p) values = [Value(target, v) for v in values] else: prob = numpy.zeros((len(values), 0)) results.append((values, prob)) headers.append(p.predictor.name) if results: results = list(zip(*(zip(*res) for res in results))) model = PredictionsModel(results, headers) else: model = None if self.selection_store is not None: self.selection_store.unregister( self.predictionsview.selectionModel()) predmodel = PredictionsSortProxyModel() predmodel.setSourceModel(model) predmodel.setDynamicSortFilter(True) self.predictionsview.setModel(predmodel) self.predictionsview.setSelectionModel( SharedSelectionModel(self.get_selection_store(predmodel), predmodel, self.predictionsview)) hheader = self.predictionsview.horizontalHeader() hheader.setSortIndicatorShown(False) # SortFilterProxyModel is slow due to large abstraction overhead # (every comparison triggers multiple `model.index(...)`, # model.rowCount(...), `model.parent`, ... calls) hheader.setSectionsClickable(predmodel.rowCount() < 20000) self.predictionsview.model().list_sorted.connect( partial(self._update_data_sort_order, self.predictionsview, self.dataview)) self.predictionsview.resizeColumnsToContents() def _update_data_sort_order(self, sort_source_view, sort_dest_view): sort_dest = sort_dest_view.model() sort_source = sort_source_view.model() sortindicatorshown = False if sort_dest is not None: assert isinstance(sort_dest, QSortFilterProxyModel) n = sort_dest.rowCount() if sort_source is not None and sort_source.sortColumn() >= 0: sortind = numpy.argsort([ sort_source.mapToSource(sort_source.index(i, 0)).row() for i in range(n) ]) sortind = numpy.array(sortind, numpy.int) sortindicatorshown = True else: sortind = None sort_dest.setSortIndices(sortind) sort_dest_view.horizontalHeader().setSortIndicatorShown(False) sort_source_view.horizontalHeader().setSortIndicatorShown( sortindicatorshown) self.commit() def _reset_order(self): datamodel = self.dataview.model() predmodel = self.predictionsview.model() if datamodel is not None: datamodel.setSortIndices(None) datamodel.sort(-1) if predmodel is not None: predmodel.setSortIndices(None) predmodel.sort(-1) self.predictionsview.horizontalHeader().setSortIndicatorShown(False) self.dataview.horizontalHeader().setSortIndicatorShown(False) def _all_color_values(self): """ Return list of colors together with their values from all predictors classes. Colors and values are sorted according to the values order for simpler comparison. """ predictors = self._non_errored_predictors() color_values = [ list( zip(*sorted(zip(p.predictor.domain.class_var.colors, p.predictor.domain.class_var.values), key=itemgetter(1)))) for p in predictors if p.predictor.domain.class_var.is_discrete ] return color_values if color_values else [([], [])] @staticmethod def _colors_match(colors1, values1, color2, values2): """ Test whether colors for values match. Colors matches when all values match for shorter list and colors match for shorter list. It is assumed that values will be sorted together with their colors. """ shorter_length = min(len(colors1), len(color2)) return (values1[:shorter_length] == values2[:shorter_length] and (numpy.array(colors1[:shorter_length]) == numpy.array( color2[:shorter_length])).all()) def _get_colors(self): """ Defines colors for values. If colors match in all models use the union otherwise use standard colors. """ all_colors_values = self._all_color_values() base_color, base_values = all_colors_values[0] for c, v in all_colors_values[1:]: if not self._colors_match(base_color, base_values, c, v): base_color = [] break # replace base_color if longer if len(v) > len(base_color): base_color = c base_values = v if len(base_color) != len(self.class_values): return LimitedDiscretePalette(len(self.class_values)).palette # reorder colors to widgets order colors = [None] * len(self.class_values) for c, v in zip(base_color, base_values): colors[self.class_values.index(v)] = c return colors def _update_prediction_delegate(self): self._delegates.clear() colors = self._get_colors() for col, slot in enumerate(self.predictors.values()): target = slot.predictor.domain.class_var shown_probs = (() if target.is_continuous else [ val if self.class_values[val] in target.values else None for val in self.selected_classes ]) delegate = PredictionsItemDelegate( None if target.is_continuous else self.class_values, colors, shown_probs, target.format_str if target.is_continuous else None, parent=self.predictionsview) # QAbstractItemView does not take ownership of delegates, so we must self._delegates.append(delegate) self.predictionsview.setItemDelegateForColumn(col, delegate) self.predictionsview.setColumnHidden(col, False) self.predictionsview.resizeColumnsToContents() self._recompute_splitter_sizes() if self.predictionsview.model() is not None: self.predictionsview.model().setProbInd(self.selected_classes) def _recompute_splitter_sizes(self): if not self.data: return view = self.predictionsview self.left_width = \ view.horizontalHeader().length() + view.verticalHeader().width() self._update_splitter() def _update_splitter(self): w1, w2 = self.splitter.sizes() self.splitter.setSizes([self.left_width, w1 + w2 - self.left_width]) def splitter_resized(self): self.left_width = self.splitter.sizes()[0] def commit(self): self._commit_predictions() self._commit_evaluation_results() def _commit_evaluation_results(self): slots = [ p for p in self._non_errored_predictors() if p.results.predicted is not None ] if not slots: self.Outputs.evaluation_results.send(None) return nanmask = numpy.isnan(self.data.get_column_view(self.class_var)[0]) data = self.data[~nanmask] results = Results(data, store_data=True) results.folds = None results.row_indices = numpy.arange(len(data)) results.actual = data.Y.ravel() results.predicted = numpy.vstack( tuple(p.results.predicted[0][~nanmask] for p in slots)) if self.class_var and self.class_var.is_discrete: results.probabilities = numpy.array( [p.results.probabilities[0][~nanmask] for p in slots]) results.learner_names = [p.name for p in slots] self.Outputs.evaluation_results.send(results) def _commit_predictions(self): if not self.data: self._set_output_summary(None) self.Outputs.predictions.send(None) return newmetas = [] newcolumns = [] for slot in self._non_errored_predictors(): if slot.predictor.domain.class_var.is_discrete: self._add_classification_out_columns(slot, newmetas, newcolumns) else: self._add_regression_out_columns(slot, newmetas, newcolumns) attrs = list(self.data.domain.attributes) metas = list(self.data.domain.metas) names = [ var.name for var in chain(attrs, self.data.domain.class_vars, metas) if var ] uniq_newmetas = [] for new_ in newmetas: uniq = get_unique_names(names, new_.name) if uniq != new_.name: new_ = new_.copy(name=uniq) uniq_newmetas.append(new_) names.append(uniq) metas += uniq_newmetas domain = Orange.data.Domain(attrs, self.class_var, metas=metas) predictions = self.data.transform(domain) if newcolumns: newcolumns = numpy.hstack( [numpy.atleast_2d(cols) for cols in newcolumns]) predictions.metas[:, -newcolumns.shape[1]:] = newcolumns index = self.dataview.model().index map_to = self.dataview.model().mapToSource assert self.selection_store is not None rows = None if self.selection_store.rows: rows = [ ind.row() for ind in self.dataview.selectionModel().selectedRows(0) ] rows.sort() elif self.dataview.model().isSorted() \ or self.predictionsview.model().isSorted(): rows = list(range(len(self.data))) if rows: source_rows = [map_to(index(row, 0)).row() for row in rows] predictions = predictions[source_rows] self.Outputs.predictions.send(predictions) self._set_output_summary(predictions) @staticmethod def _add_classification_out_columns(slot, newmetas, newcolumns): # Mapped or unmapped predictions?! # Or provide a checkbox so the user decides? pred = slot.predictor name = pred.name values = pred.domain.class_var.values newmetas.append(DiscreteVariable(name=name, values=values)) newcolumns.append(slot.results.unmapped_predicted.reshape(-1, 1)) newmetas += [ ContinuousVariable(name=f"{name} ({value})") for value in values ] newcolumns.append(slot.results.unmapped_probabilities) @staticmethod def _add_regression_out_columns(slot, newmetas, newcolumns): newmetas.append(ContinuousVariable(name=slot.predictor.name)) newcolumns.append(slot.results.unmapped_predicted.reshape((-1, 1))) def send_report(self): def merge_data_with_predictions(): data_model = self.dataview.model() predictions_view = self.predictionsview predictions_model = predictions_view.model() # use ItemDelegate to style prediction values delegates = [ predictions_view.itemDelegateForColumn(i) for i in range(predictions_model.columnCount()) ] # iterate only over visible columns of data's QTableView iter_data_cols = list( filter(lambda x: not self.dataview.isColumnHidden(x), range(data_model.columnCount()))) # print header yield [''] + \ [predictions_model.headerData(col, Qt.Horizontal, Qt.DisplayRole) for col in range(predictions_model.columnCount())] + \ [data_model.headerData(col, Qt.Horizontal, Qt.DisplayRole) for col in iter_data_cols] # print data & predictions for i in range(data_model.rowCount()): yield [data_model.headerData(i, Qt.Vertical, Qt.DisplayRole)] + \ [delegate.displayText( predictions_model.data(predictions_model.index(i, j)), QLocale()) for j, delegate in enumerate(delegates)] + \ [data_model.data(data_model.index(i, j)) for j in iter_data_cols] if self.data: text = self._get_details().replace('\n', '<br>') if self.selected_classes: text += '<br>Showing probabilities for: ' text += ', '.join( [self.class_values[i] for i in self.selected_classes]) self.report_paragraph('Info', text) self.report_table("Data & Predictions", merge_data_with_predictions(), header_rows=1, header_columns=1) self.report_table("Scores", self.score_table.view) def resizeEvent(self, event): super().resizeEvent(event) self._update_splitter() def showEvent(self, event): super().showEvent(event) QTimer.singleShot(0, self._update_splitter)
class OWPredictions(widget.OWWidget): name = "Predictions" icon = "icons/Predictions.svg" priority = 200 description = "Display the predictions of models for an input data set." inputs = [("Data", Orange.data.Table, "set_data"), ("Predictors", Model, "set_predictor", widget.Multiple)] outputs = [("Predictions", Orange.data.Table), ("Evaluation Results", Orange.evaluation.Results)] settingsHandler = settings.ClassValuesContextHandler() #: Display the full input dataset or only the target variable columns (if #: available) show_attrs = settings.Setting(True) #: Show predicted values (for discrete target variable) show_predictions = settings.Setting(True) #: Show predictions probabilities (for discrete target variable) show_probabilities = settings.Setting(True) #: List of selected class value indices in the "Show probabilities" list selected_classes = settings.ContextSetting([]) #: Draw colored distribution bars draw_dist = settings.Setting(True) output_attrs = settings.Setting(True) output_predictions = settings.Setting(True) output_probabilities = settings.Setting(True) def __init__(self): super().__init__() #: Input data table self.data = None # type: Optional[Orange.data.Table] #: A dict mapping input ids to PredictorSlot self.predictors = OrderedDict() # type: Dict[object, PredictorSlot] #: A class variable (prediction target) self.class_var = None # type: Optional[Orange.data.Variable] #: List of (discrete) class variable's values self.class_values = [] # type: List[str] box = gui.vBox(self.controlArea, "Info") self.infolabel = gui.widgetLabel( box, "No data on input.\nPredictors: 0\nTask: N/A") self.infolabel.setMinimumWidth(150) gui.button(box, self, "Restore Original Order", callback=self._reset_order, tooltip="Show rows in the original order") self.classification_options = box = gui.vBox( self.controlArea, "Options (classification)", spacing=-1, addSpace=False) gui.checkBox(box, self, "show_predictions", "Show predicted class", callback=self._update_prediction_delegate) b = gui.checkBox(box, self, "show_probabilities", "Show predicted probabilities", callback=self._update_prediction_delegate) ibox = gui.indentedBox(box, sep=gui.checkButtonOffsetHint(b), addSpace=False) gui.listBox(ibox, self, "selected_classes", "class_values", callback=self._update_prediction_delegate, selectionMode=QListWidget.MultiSelection, addSpace=False) gui.checkBox(box, self, "draw_dist", "Draw distribution bars", callback=self._update_prediction_delegate) box = gui.vBox(self.controlArea, "Data View") gui.checkBox(box, self, "show_attrs", "Show full data set", callback=self._update_column_visibility) box = gui.vBox(self.controlArea, "Output", spacing=-1) self.checkbox_class = gui.checkBox( box, self, "output_attrs", "Original data", callback=self.commit) self.checkbox_class = gui.checkBox( box, self, "output_predictions", "Predictions", callback=self.commit) self.checkbox_prob = gui.checkBox( box, self, "output_probabilities", "Probabilities", callback=self.commit) gui.rubber(self.controlArea) self.splitter = QSplitter( orientation=Qt.Horizontal, childrenCollapsible=False, handleWidth=2, ) self.dataview = TableView( verticalScrollBarPolicy=Qt.ScrollBarAlwaysOn, horizontalScrollBarPolicy=Qt.ScrollBarAlwaysOn, horizontalScrollMode=QTableView.ScrollPerPixel, selectionMode=QTableView.NoSelection, focusPolicy=Qt.StrongFocus ) self.predictionsview = TableView( verticalScrollBarPolicy=Qt.ScrollBarAlwaysOff, horizontalScrollBarPolicy=Qt.ScrollBarAlwaysOn, horizontalScrollMode=QTableView.ScrollPerPixel, selectionMode=QTableView.NoSelection, focusPolicy=Qt.StrongFocus, sortingEnabled=True, ) self.predictionsview.setItemDelegate(PredictionsItemDelegate()) self.dataview.verticalHeader().hide() dsbar = self.dataview.verticalScrollBar() psbar = self.predictionsview.verticalScrollBar() psbar.valueChanged.connect(dsbar.setValue) dsbar.valueChanged.connect(psbar.setValue) self.dataview.verticalHeader().setDefaultSectionSize(22) self.predictionsview.verticalHeader().setDefaultSectionSize(22) self.dataview.verticalHeader().sectionResized.connect( lambda index, _, size: self.predictionsview.verticalHeader() .resizeSection(index, size) ) self.splitter.addWidget(self.predictionsview) self.splitter.addWidget(self.dataview) self.mainArea.layout().addWidget(self.splitter) @check_sql_input def set_data(self, data): """Set the input data set""" self.data = data if data is None: self.dataview.setModel(None) self.predictionsview.setModel(None) self.predictionsview.setItemDelegate(PredictionsItemDelegate()) else: # force full reset of the view's HeaderView state self.dataview.setModel(None) model = TableModel(data, parent=None) modelproxy = TableSortProxyModel() modelproxy.setSourceModel(model) self.dataview.setModel(modelproxy) self._update_column_visibility() self.invalidate_predictions() def set_predictor(self, predictor=None, id=None): if id in self.predictors: if predictor is not None: self.predictors[id] = self.predictors[id]._replace( predictor=predictor, name=predictor.name, results=None) else: del self.predictors[id] elif predictor is not None: self.predictors[id] = \ PredictorSlot(predictor, predictor.name, None) if predictor is not None: self.class_var = predictor.domain.class_var def handleNewSignals(self): self.clear_messages() if self.data is not None: for inputid, pred in list(self.predictors.items()): if pred.results is None or numpy.isnan(pred.results[0]).all(): try: results = self.predict(pred.predictor, self.data) except ValueError as err: err_msg = '{}:\n'.format(pred.predictor.name) + \ str(err) self.error(err_msg) n, m = len(self.data), 1 if self.data.domain.has_discrete_class: m = len(self.data.domain.class_var.values) probabilities = numpy.full((n, m), numpy.nan) results = (numpy.full(n, numpy.nan), probabilities) self.predictors[inputid] = pred._replace(results=results) if not self.predictors: self.class_var = None self.classification_options.setVisible( self.class_var is not None and self.class_var.is_discrete) self.closeContext() if self.class_var is not None and self.class_var.is_discrete: self.class_values = list(self.class_var.values) self.selected_classes = list(range(len(self.class_values))) self.openContext(self.class_var) else: self.class_values = [] self.selected_classes = [] self._update_predictions_model() self._update_prediction_delegate() # Check for prediction target consistency target_vars = set([p.predictor.domain.class_var for p in self.predictors.values()]) self.warning("Mismatching class variables", shown=len(target_vars) > 1) # Update the Info box text. info = [] if self.data is not None: info.append("Data: {} instances.".format(len(self.data))) else: info.append("Data: N/A") if self.predictors: info.append("Predictors: {}".format(len(self.predictors))) else: info.append("Predictors: N/A") if self.class_var is not None: if self.class_var.is_discrete: info.append("Task: Classification") self.checkbox_class.setEnabled(True) self.checkbox_prob.setEnabled(True) else: info.append("Task: Regression") self.checkbox_class.setEnabled(False) self.checkbox_prob.setEnabled(False) else: info.append("Task: N/A") self.infolabel.setText("\n".join(info)) self.commit() def invalidate_predictions(self): for inputid, pred in list(self.predictors.items()): self.predictors[inputid] = pred._replace(results=None) def _update_predictions_model(self): """Update the prediction view model.""" if self.data is not None: slots = self.predictors.values() results = [] for p in slots: values, prob = p.results if p.predictor.domain.class_var.is_discrete: values = [ Orange.data.Value(p.predictor.domain.class_var, v) for v in values ] results.append((values, prob)) results = list(zip(*(zip(*res) for res in results))) headers = [p.name for p in slots] model = PredictionsModel(results, headers) else: model = None predmodel = PredictionsSortProxyModel() predmodel.setSourceModel(model) predmodel.setDynamicSortFilter(True) self.predictionsview.setItemDelegate(PredictionsItemDelegate()) self.predictionsview.setModel(predmodel) hheader = self.predictionsview.horizontalHeader() hheader.setSortIndicatorShown(False) # SortFilterProxyModel is slow due to large abstraction overhead # (every comparison triggers multiple `model.index(...)`, # model.rowCount(...), `model.parent`, ... calls) hheader.setSectionsClickable(predmodel.rowCount() < 20000) predmodel.layoutChanged.connect(self._update_data_sort_order) self._update_data_sort_order() self.predictionsview.resizeColumnsToContents() def _update_column_visibility(self): """Update data column visibility.""" if self.data is not None: domain = self.data.domain first_attr = len(domain.class_vars) + len(domain.metas) for i in range(first_attr, first_attr + len(domain.attributes)): self.dataview.setColumnHidden(i, not self.show_attrs) if domain.class_var: self.dataview.setColumnHidden(0, False) def _update_data_sort_order(self): """Update data row order to match the current predictions view order""" datamodel = self.dataview.model() # data model proxy predmodel = self.predictionsview.model() # predictions model proxy sortindicatorshown = False if datamodel is not None: assert isinstance(datamodel, TableSortProxyModel) n = datamodel.rowCount() if predmodel is not None and predmodel.sortColumn() >= 0: sortind = numpy.argsort( [predmodel.mapToSource(predmodel.index(i, 0)).row() for i in range(n)]) sortind = numpy.array(sortind, numpy.int) sortindicatorshown = True else: sortind = None datamodel.setSortIndices(sortind) self.predictionsview.horizontalHeader() \ .setSortIndicatorShown(sortindicatorshown) def _reset_order(self): """Reset the row sorting to original input order.""" datamodel = self.dataview.model() predmodel = self.predictionsview.model() if datamodel is not None: datamodel.sort(-1) if predmodel is not None: predmodel.sort(-1) self.predictionsview.horizontalHeader().setSortIndicatorShown(False) def _update_prediction_delegate(self): """Update the predicted probability visibility state""" delegate = PredictionsItemDelegate() colors = None if self.class_var is not None: if self.class_var.is_discrete: colors = [QtGui.QColor(*rgb) for rgb in self.class_var.colors] dist_fmt = "" pred_fmt = "" if self.show_probabilities: decimals = 2 float_fmt = "{{dist[{}]:.{}f}}" dist_fmt = " : ".join( float_fmt.format(i, decimals) for i in range(len(self.class_var.values)) if i in self.selected_classes ) if self.show_predictions: pred_fmt = "{value!s}" if pred_fmt and dist_fmt: fmt = dist_fmt + " \N{RIGHTWARDS ARROW} " + pred_fmt else: fmt = dist_fmt or pred_fmt else: assert isinstance(self.class_var, ContinuousVariable) fmt = "{{value:.{}f}}".format( self.class_var.number_of_decimals) delegate.setFormat(fmt) if self.draw_dist and colors is not None: delegate.setColors(colors) self.predictionsview.setItemDelegate(delegate) self.predictionsview.resizeColumnsToContents() if self.class_var is not None and self.class_var.is_discrete: proxy = self.predictionsview.model() if proxy is not None: proxy.setProbInd(numpy.array(self.selected_classes, dtype=int)) self._update_spliter() def _update_spliter(self): if self.data is None: return def width(view): h_header = view.horizontalHeader() v_header = view.verticalHeader() return h_header.length() + v_header.width() w = width(self.predictionsview) + 4 w1, w2 = self.splitter.sizes() self.splitter.setSizes([w, w1 + w2 - w]) def commit(self): if self.data is None or not self.predictors: self.send("Predictions", None) self.send("Evaluation Results", None) return predictor = next(iter(self.predictors.values())).predictor class_var = predictor.domain.class_var classification = class_var and class_var.is_discrete newmetas = [] newcolumns = [] slots = list(self.predictors.values()) if classification: if self.output_predictions: mc = [DiscreteVariable(name=p.name, values=class_var.values) for p in slots] newmetas.extend(mc) newcolumns.extend(p.results[0].reshape((-1, 1)) for p in slots) if self.output_probabilities: for p in slots: m = [ContinuousVariable(name="%s(%s)" % (p.name, value)) for value in class_var.values] newmetas.extend(m) newcolumns.extend(p.results[1] for p in slots) else: # regression mc = [ContinuousVariable(name=p.name) for p in self.predictors.values()] newmetas.extend(mc) newcolumns.extend(p.results[0].reshape((-1, 1)) for p in slots) if self.output_attrs: attrs = list(self.data.domain.attributes) else: attrs = [] metas = list(self.data.domain.metas) + newmetas domain = Orange.data.Domain(attrs, self.data.domain.class_var, metas=metas) predictions = self.data.from_table(domain, self.data) if newcolumns: newcolumns = numpy.hstack( [numpy.atleast_2d(cols) for cols in newcolumns] ) predictions.metas[:, -newcolumns.shape[1]:] = newcolumns results = None if self.data.domain.class_var == class_var: N = len(self.data) results = Orange.evaluation.Results(self.data, store_data=True) results.folds = None results.row_indices = numpy.arange(N) results.actual = self.data.Y.ravel() results.predicted = numpy.vstack( tuple(p.results[0] for p in slots)) if classification: results.probabilities = numpy.array( [p.results[1] for p in slots]) results.learner_names = [p.name for p in slots] self.send("Predictions", predictions) self.send("Evaluation Results", results) def send_report(self): def merge_data_with_predictions(): data_model = self.dataview.model() predictions_model = self.predictionsview.model() # use ItemDelegate to style prediction values style = lambda x: self.predictionsview.itemDelegate().displayText(x, QLocale()) # iterate only over visible columns of data's QTableView iter_data_cols = list(filter(lambda x: not self.dataview.isColumnHidden(x), range(data_model.columnCount()))) # print header yield [''] + \ [predictions_model.headerData(col, Qt.Horizontal, Qt.DisplayRole) for col in range(predictions_model.columnCount())] + \ [data_model.headerData(col, Qt.Horizontal, Qt.DisplayRole) for col in iter_data_cols] # print data & predictions for i in range(data_model.rowCount()): yield [data_model.headerData(i, Qt.Vertical, Qt.DisplayRole)] + \ [style(predictions_model.data(predictions_model.index(i, j))) for j in range(predictions_model.columnCount())] + \ [data_model.data(data_model.index(i, j)) for j in iter_data_cols] if self.data is not None: text = self.infolabel.text().replace('\n', '<br>') if self.show_probabilities and self.selected_classes: text += '<br>Showing probabilities for: ' text += ', '. join([self.data.domain.class_var.values[i] for i in self.selected_classes]) self.report_paragraph('Info', text) self.report_table("Data & Predictions", merge_data_with_predictions(), header_rows=1, header_columns=1) @classmethod def predict(cls, predictor, data): class_var = predictor.domain.class_var if class_var: if class_var.is_discrete: return cls.predict_discrete(predictor, data) elif class_var.is_continuous: return cls.predict_continuous(predictor, data) @staticmethod def predict_discrete(predictor, data): return predictor(data, Model.ValueProbs) @staticmethod def predict_continuous(predictor, data): values = predictor(data, Model.Value) return values, [None] * len(data)
class OWCorpusViewer(OWWidget): name = "Corpus Viewer" description = "Display corpus contents." icon = "icons/CorpusViewer.svg" priority = 500 class Inputs: corpus = Input("Corpus", Corpus, replaces=["Data"]) class Outputs: matching_docs = Output("Matching Docs", Corpus, default=True) other_docs = Output("Other Docs", Corpus) corpus = Output("Corpus", Corpus) settingsHandler = PerfectDomainContextHandler( match_values = PerfectDomainContextHandler.MATCH_VALUES_ALL ) search_indices = ContextSetting([], exclude_metas=False) # features included in search display_indices = ContextSetting([], exclude_metas=False) # features for display display_features = ContextSetting([], exclude_metas=False) selected_documents = ContextSetting([]) regexp_filter = ContextSetting("") show_tokens = Setting(False) autocommit = Setting(True) class Warning(OWWidget.Warning): no_feats_search = Msg('No features included in search.') no_feats_display = Msg('No features selected for display.') def __init__(self): super().__init__() self.corpus = None # Corpus self.corpus_docs = None # Documents generated from Corpus self.doc_webview = None # WebView for showing content self.search_features = [] # two copies are needed since Display allows drag & drop self.display_list_indices = [0] self.matches = 0 # Matches of the query # Info attributes self.update_info() info_box = gui.widgetBox(self.controlArea, 'Info') gui.label(info_box, self, 'Tokens: %(n_tokens)s') gui.label(info_box, self, 'Types: %(n_types)s') gui.label(info_box, self, 'Matching documents: %(n_matching)s') gui.label(info_box, self, 'Matches: %(n_matches)s') # Search features self.search_listbox = gui.listBox( self.controlArea, self, 'search_indices', 'search_features', selectionMode=QListView.ExtendedSelection, box='Search features', callback=self.search_features_changed) # Display features display_box = gui.widgetBox(self.controlArea, 'Display features') self.display_listbox = gui.listBox( display_box, self, 'display_list_indices', 'display_features', selectionMode=QListView.ExtendedSelection, callback=self.show_docs, enableDragDrop=True) self.show_tokens_checkbox = gui.checkBox(display_box, self, 'show_tokens', 'Show Tokens && Tags', callback=self.show_docs) # Auto-commit box gui.auto_commit(self.controlArea, self, 'autocommit', 'Send data', 'Auto send is on') # Search self.filter_input = gui.lineEdit(self.mainArea, self, 'regexp_filter', orientation=Qt.Horizontal, sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed), label='RegExp Filter:', callback=self.refresh_search) # Main area self.splitter = QSplitter( orientation=Qt.Horizontal, childrenCollapsible=False, ) # Document list self.doc_list = QTableView() self.doc_list.setSelectionBehavior(QTableView.SelectRows) self.doc_list.setSelectionMode(QTableView.ExtendedSelection) self.doc_list.setEditTriggers(QAbstractItemView.NoEditTriggers) self.doc_list.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch) self.doc_list.horizontalHeader().setVisible(False) self.splitter.addWidget(self.doc_list) self.doc_list_model = QStandardItemModel(self) self.doc_list.setModel(self.doc_list_model) self.doc_list.selectionModel().selectionChanged.connect( self.selection_changed ) # Document contents self.doc_webview = gui.WebviewWidget(self.splitter, debug=False) self.mainArea.layout().addWidget(self.splitter) def copy_to_clipboard(self): text = self.doc_webview.selectedText() QApplication.clipboard().setText(text) @Inputs.corpus def set_data(self, corpus=None): self.closeContext() self.reset_widget() self.corpus = corpus self.search_features = [] if corpus is not None: domain = self.corpus.domain # Enable/disable tokens checkbox if not self.corpus.has_tokens(): self.show_tokens_checkbox.setCheckState(False) self.show_tokens_checkbox.setEnabled(self.corpus.has_tokens()) self.search_features = list(filter_visible(chain(domain.variables, domain.metas))) self.display_features = list(filter_visible(chain(domain.variables, domain.metas))) self.search_indices = list(range(len(self.search_features))) self.display_indices = list(range(len(self.display_features))) self.selected_documents = [corpus.titles[0]] if \ corpus.titles is not None and len(corpus.titles) else [] self.openContext(self.corpus) self.display_list_indices = self.display_indices self.regenerate_docs() self.list_docs() self.update_info() self.set_selection() self.show_docs() self.commit() def reset_widget(self): # Corpus self.corpus = None self.corpus_docs = None self.display_features = [] # Widgets self.search_listbox.clear() self.display_listbox.clear() self.filter_input.clear() self.update_info() # Models/vars self.search_features.clear() self.search_indices.clear() self.display_indices.clear() self.doc_list_model.clear() # Warnings self.Warning.clear() # WebView self.doc_webview.setHtml('') def list_docs(self): """ List documents into the left scrolling area """ if self.corpus_docs is None: return # TODO: remove search_keyword?? search_keyword = self.regexp_filter.strip('|') matches = 0 try: reg = re.compile(search_keyword, re.IGNORECASE) except sre_constants.error: return self.doc_list_model.clear() for i, (doc, title, content) in enumerate(zip(self.corpus, self.corpus.titles, self.corpus_docs)): res = len(list(reg.finditer(content))) if self.regexp_filter else 0 if not self.regexp_filter or res: matches += res item = QStandardItem() item.setData(str(title), Qt.DisplayRole) item.setData(doc, Qt.UserRole) self.doc_list_model.appendRow(item) self.matches = matches def get_selected_documents_from_view(self) -> Set[str]: """ Returns ------- Set with names of selected documents in the QTableView """ return { i.data(Qt.DisplayRole) for i in self.doc_list.selectionModel().selectedRows() } def set_selection(self) -> None: """ Select documents in selected_documents attribute in the view """ view = self.doc_list model = view.model() previously_selected = self.selected_documents.copy() selection = QItemSelection() for row in range(model.rowCount()): document = model.data(model.index(row, 0), Qt.DisplayRole) if document in self.selected_documents: selection.append(QItemSelectionRange( view.model().index(row, 0), view.model().index(row, 0) )) view.selectionModel().select( selection, QItemSelectionModel.ClearAndSelect ) if len(selection) == 0: # in cases when selection is empty qt's selection_changed is not # called and so we need to manually trigger show_docs self.show_docs() # select emmit selection change signal which causes calling # selection_changed when filtering it means that documents which # are currently filtered out get removed from self.selected_douments # we still want to keep them to be still selected after user removes # filter self.selected_documents = previously_selected def selection_changed(self) -> None: """ Function is called every time the selection changes - when user select new range of documents """ self.selected_documents = self.get_selected_documents_from_view() self.show_docs() self.commit() def show_docs(self): """ Show the selected documents in the right area """ HTML = ''' <!doctype html> <html> <head> <script type="text/javascript" src="resources/jquery-3.1.1.min.js"> </script> <script type="text/javascript" src="resources/jquery.mark.min.js"> </script> <script type="text/javascript" src="resources/highlighter.js"> </script> <meta charset='utf-8'> <style> table {{ border-collapse: collapse; }} mark {{ background: #FFCD28; }} tr > td {{ padding-bottom: 3px; padding-top: 3px; }} body {{ font-family: Helvetica; font-size: 10pt; }} .line {{ border-bottom: 1px solid #000; }} .separator {{ height: 5px; }} .variables {{ vertical-align: top; padding-right: 10px; }} .content {{ /* Adopted from https://css-tricks.com/snippets/css/prevent-long-urls-from-breaking-out-of-container/ */ /* These are technically the same, but use both */ overflow-wrap: break-word; word-wrap: break-word; -ms-word-break: break-all; /* This is the dangerous one in WebKit, as it breaks things wherever */ word-break: break-all; /* Instead use this non-standard one: */ word-break: break-word; /* Adds a hyphen where the word breaks, if supported (No Blink) */ -ms-hyphens: auto; -moz-hyphens: auto; -webkit-hyphens: auto; hyphens: auto; }} .token {{ padding: 3px; border: 1px #B0B0B0 solid; margin-right: 5px; margin-bottom: 5px; display: inline-block; }} img {{ max-width: 100%; }} </style> </head> <body> {} </body> </html> ''' self.display_indices = self.display_list_indices if self.corpus is None: return self.Warning.no_feats_display.clear() if len(self.display_indices) == 0: self.Warning.no_feats_display() if self.show_tokens: tokens = list(self.corpus.ngrams_iterator(include_postags=True)) marked_search_features = [f for i, f in enumerate(self.search_features) if i in self.search_indices] html = '<table>' for doc_count, index in enumerate(self.doc_list.selectionModel().selectedRows()): if doc_count > 0: # add split html += '<tr class="line separator"><td/><td/></tr>' \ '<tr class="separator"><td/><td/></tr>' row_ind = index.data(Qt.UserRole).row_index for ind in self.display_indices: feature = self.display_features[ind] value = str(index.data(Qt.UserRole)[feature.name]) if feature in marked_search_features: value = self.__mark_text(value) value = value.replace('\n', '<br/>') is_image = feature.attributes.get('type', '') == 'image' if is_image and value != '?': value = '<img src="{}"></img>'.format(value) html += '<tr><td class="variables"><strong>{}:</strong></td>' \ '<td class="content">{}</td></tr>'.format( feature.name, value) if self.show_tokens: html += '<tr><td class="variables"><strong>Tokens & Tags:</strong></td>' \ '<td>{}</td></tr>'.format(''.join('<span class="token">{}</span>'.format( token) for token in tokens[row_ind])) html += '</table>' base = QUrl.fromLocalFile(__file__) self.doc_webview.setHtml(HTML.format(html), base) def __mark_text(self, text): search_keyword = self.regexp_filter.strip('|') if not search_keyword: return text try: reg = re.compile(search_keyword, re.IGNORECASE | re.MULTILINE) except sre_constants.error: return text matches = list(reg.finditer(text)) if not matches: return text text = list(text) for m in matches[::-1]: text[m.start():m.end()] = list('<mark data-markjs="true">{}</mark>'\ .format("".join(text[m.start():m.end()]))) return "".join(text) def search_features_changed(self): self.regenerate_docs() self.refresh_search() def regenerate_docs(self): self.corpus_docs = None self.Warning.no_feats_search.clear() if self.corpus is not None: feats = [self.search_features[i] for i in self.search_indices] if len(feats) == 0: self.Warning.no_feats_search() self.corpus_docs = self.corpus.documents_from_features(feats) def refresh_search(self): if self.corpus is not None: self.list_docs() self.set_selection() self.update_info() self.commit() def update_info(self): if self.corpus is not None: self.n_matching = '{}/{}'.format(self.doc_list_model.rowCount(), len(self.corpus)) self.n_matches = self.matches if self.matches else 'n/a' self.n_tokens = sum(map(len, self.corpus.tokens)) if self.corpus.has_tokens() else 'n/a' self.n_types = len(self.corpus.dictionary) if self.corpus.has_tokens() else 'n/a' else: self.n_matching = '' self.n_matches = '' self.n_tokens = '' self.n_types = '' def commit(self): matched = unmatched = annotated_corpus = None corpus = self.corpus if corpus is not None: # it returns a set of selected documents which are in view selected_docs = self.get_selected_documents_from_view() titles = corpus.titles matched_mask = [ i for i, t in enumerate(titles) if t in selected_docs ] unmatched_mask = [ i for i, t in enumerate(titles) if t not in selected_docs ] matched = corpus[matched_mask] if len(matched_mask) else None unmatched = corpus[unmatched_mask] if len(unmatched_mask) else None annotated_corpus = create_annotated_table(corpus, matched_mask) self.Outputs.matching_docs.send(matched) self.Outputs.other_docs.send(unmatched) self.Outputs.corpus.send(annotated_corpus) def send_report(self): self.report_items(( ("Query", self.regexp_filter), ("Matching documents", self.n_matching), ("Matches", self.n_matches) )) def showEvent(self, event): super().showEvent(event) self.update_splitter() def update_splitter(self): """ Update splitter that document list on the left never take more than 1/3 of the space. It is only set on showEvent. If user later changes sizes it stays as it is. """ w1, w2 = self.splitter.sizes() ws = w1 + w2 if w2 < 2/3 * ws: self.splitter.setSizes([ws * 1/3, ws * 2/3])
class OWPredictions(OWWidget): name = "Predictions" icon = "icons/Predictions.svg" priority = 200 description = "Display the predictions of models for an input dataset." keywords = [] class Inputs: data = Input("Data", Orange.data.Table) predictors = Input("Predictors", Model, multiple=True) class Outputs: predictions = Output("Predictions", Orange.data.Table) evaluation_results = Output("Evaluation Results", Orange.evaluation.Results, dynamic=False) class Warning(OWWidget.Warning): empty_data = Msg("Empty dataset") class Error(OWWidget.Error): predictor_failed = \ Msg("One or more predictors failed (see more...)\n{}") predictors_target_mismatch = \ Msg("Predictors do not have the same target.") data_target_mismatch = \ Msg("Data does not have the same target as predictors.") settingsHandler = settings.ClassValuesContextHandler() #: Display the full input dataset or only the target variable columns (if #: available) show_attrs = settings.Setting(True) #: Show predicted values (for discrete target variable) show_predictions = settings.Setting(True) #: Show predictions probabilities (for discrete target variable) show_probabilities = settings.Setting(True) #: List of selected class value indices in the "Show probabilities" list selected_classes = settings.ContextSetting([]) #: Draw colored distribution bars draw_dist = settings.Setting(True) output_attrs = settings.Setting(True) output_predictions = settings.Setting(True) output_probabilities = settings.Setting(True) def __init__(self): super().__init__() #: Input data table self.data = None # type: Optional[Orange.data.Table] #: A dict mapping input ids to PredictorSlot self.predictors = OrderedDict() # type: Dict[object, PredictorSlot] #: A class variable (prediction target) self.class_var = None # type: Optional[Orange.data.Variable] #: List of (discrete) class variable's values self.class_values = [] # type: List[str] box = gui.vBox(self.controlArea, "Info") self.infolabel = gui.widgetLabel( box, "No data on input.\nPredictors: 0\nTask: N/A") self.infolabel.setMinimumWidth(150) gui.button(box, self, "Restore Original Order", callback=self._reset_order, tooltip="Show rows in the original order") self.classification_options = box = gui.vBox(self.controlArea, "Show", spacing=-1, addSpace=False) gui.checkBox(box, self, "show_predictions", "Predicted class", callback=self._update_prediction_delegate) b = gui.checkBox(box, self, "show_probabilities", "Predicted probabilities for:", callback=self._update_prediction_delegate) ibox = gui.indentedBox(box, sep=gui.checkButtonOffsetHint(b), addSpace=False) gui.listBox(ibox, self, "selected_classes", "class_values", callback=self._update_prediction_delegate, selectionMode=QListWidget.MultiSelection, addSpace=False) gui.checkBox(box, self, "draw_dist", "Draw distribution bars", callback=self._update_prediction_delegate) box = gui.vBox(self.controlArea, "Data View") gui.checkBox(box, self, "show_attrs", "Show full dataset", callback=self._update_column_visibility) box = gui.vBox(self.controlArea, "Output", spacing=-1) self.checkbox_class = gui.checkBox(box, self, "output_attrs", "Original data", callback=self.commit) self.checkbox_class = gui.checkBox(box, self, "output_predictions", "Predictions", callback=self.commit) self.checkbox_prob = gui.checkBox(box, self, "output_probabilities", "Probabilities", callback=self.commit) gui.rubber(self.controlArea) self.splitter = QSplitter( orientation=Qt.Horizontal, childrenCollapsible=False, handleWidth=2, ) self.dataview = TableView( verticalScrollBarPolicy=Qt.ScrollBarAlwaysOn, horizontalScrollBarPolicy=Qt.ScrollBarAlwaysOn, horizontalScrollMode=QTableView.ScrollPerPixel, selectionMode=QTableView.NoSelection, focusPolicy=Qt.StrongFocus) self.predictionsview = TableView( verticalScrollBarPolicy=Qt.ScrollBarAlwaysOff, horizontalScrollBarPolicy=Qt.ScrollBarAlwaysOn, horizontalScrollMode=QTableView.ScrollPerPixel, selectionMode=QTableView.NoSelection, focusPolicy=Qt.StrongFocus, sortingEnabled=True, ) self.predictionsview.setItemDelegate(PredictionsItemDelegate()) self.dataview.verticalHeader().hide() dsbar = self.dataview.verticalScrollBar() psbar = self.predictionsview.verticalScrollBar() psbar.valueChanged.connect(dsbar.setValue) dsbar.valueChanged.connect(psbar.setValue) self.dataview.verticalHeader().setDefaultSectionSize(22) self.predictionsview.verticalHeader().setDefaultSectionSize(22) self.dataview.verticalHeader().sectionResized.connect( lambda index, _, size: self.predictionsview.verticalHeader( ).resizeSection(index, size)) self.splitter.addWidget(self.predictionsview) self.splitter.addWidget(self.dataview) self.mainArea.layout().addWidget(self.splitter) @Inputs.data @check_sql_input def set_data(self, data): """Set the input dataset""" if data is not None and not len(data): data = None self.Warning.empty_data() else: self.Warning.empty_data.clear() self.data = data if data is None: self.dataview.setModel(None) self.predictionsview.setModel(None) self.predictionsview.setItemDelegate(PredictionsItemDelegate()) else: # force full reset of the view's HeaderView state self.dataview.setModel(None) model = TableModel(data, parent=None) modelproxy = TableSortProxyModel() modelproxy.setSourceModel(model) self.dataview.setModel(modelproxy) self._update_column_visibility() self._invalidate_predictions() @Inputs.predictors def set_predictor(self, predictor=None, id=None): if id in self.predictors: if predictor is not None: self.predictors[id] = self.predictors[id]._replace( predictor=predictor, name=predictor.name, results=None) else: del self.predictors[id] elif predictor is not None: self.predictors[id] = \ PredictorSlot(predictor, predictor.name, None) def set_class_var(self): pred_classes = set(pred.predictor.domain.class_var for pred in self.predictors.values()) self.Error.predictors_target_mismatch.clear() self.Error.data_target_mismatch.clear() self.class_var = None if len(pred_classes) > 1: self.Error.predictors_target_mismatch() if len(pred_classes) == 1: self.class_var = pred_classes.pop() if self.data is not None and \ self.data.domain.class_var is not None and \ self.class_var != self.data.domain.class_var: self.Error.data_target_mismatch() self.class_var = None discrete_class = self.class_var is not None \ and self.class_var.is_discrete self.classification_options.setVisible(discrete_class) self.closeContext() if discrete_class: self.class_values = list(self.class_var.values) self.selected_classes = list(range(len(self.class_values))) self.openContext(self.class_var) else: self.class_values = [] self.selected_classes = [] def handleNewSignals(self): self.set_class_var() if self.data is not None: self._call_predictors() self._update_predictions_model() self._update_prediction_delegate() self._set_errors() self._update_info() self.commit() def _call_predictors(self): for inputid, pred in self.predictors.items(): if pred.results is None or numpy.isnan(pred.results[0]).all(): try: results = self.predict(pred.predictor, self.data) except ValueError as err: results = "{}: {}".format(pred.predictor.name, err) self.predictors[inputid] = pred._replace(results=results) def _set_errors(self): errors = "\n".join(p.results for p in self.predictors.values() if isinstance(p.results, str)) if errors: self.Error.predictor_failed(errors) else: self.Error.predictor_failed.clear() def _update_info(self): info = [] if self.data is not None: info.append("Data: {} instances.".format(len(self.data))) else: info.append("Data: N/A") n_predictors = len(self.predictors) n_valid = len(self._valid_predictors()) if n_valid != n_predictors: info.append("Predictors: {} (+ {} failed)".format( n_valid, n_predictors - n_valid)) else: info.append("Predictors: {}".format(n_predictors or "N/A")) if self.class_var is None: info.append("Task: N/A") elif self.class_var.is_discrete: info.append("Task: Classification") self.checkbox_class.setEnabled(True) self.checkbox_prob.setEnabled(True) else: info.append("Task: Regression") self.checkbox_class.setEnabled(False) self.checkbox_prob.setEnabled(False) self.infolabel.setText("\n".join(info)) def _invalidate_predictions(self): for inputid, pred in list(self.predictors.items()): self.predictors[inputid] = pred._replace(results=None) def _valid_predictors(self): if self.class_var is not None and \ self.data is not None: return [ p for p in self.predictors.values() if p.results is not None and not isinstance(p.results, str) ] else: return [] def _update_predictions_model(self): """Update the prediction view model.""" if self.data is not None and self.class_var is not None: slots = self._valid_predictors() results = [] class_var = self.class_var for p in slots: values, prob = p.results if self.class_var.is_discrete: # if values were added to class_var between building the # model and predicting, add zeros for new class values, # which are always at the end prob = numpy.c_[prob, numpy.zeros( (prob.shape[0], len(class_var.values) - prob.shape[1]))] values = [Value(class_var, v) for v in values] results.append((values, prob)) results = list(zip(*(zip(*res) for res in results))) headers = [p.name for p in slots] model = PredictionsModel(results, headers) else: model = None predmodel = PredictionsSortProxyModel() predmodel.setSourceModel(model) predmodel.setDynamicSortFilter(True) self.predictionsview.setItemDelegate(PredictionsItemDelegate()) self.predictionsview.setModel(predmodel) hheader = self.predictionsview.horizontalHeader() hheader.setSortIndicatorShown(False) # SortFilterProxyModel is slow due to large abstraction overhead # (every comparison triggers multiple `model.index(...)`, # model.rowCount(...), `model.parent`, ... calls) hheader.setSectionsClickable(predmodel.rowCount() < 20000) predmodel.layoutChanged.connect(self._update_data_sort_order) self._update_data_sort_order() self.predictionsview.resizeColumnsToContents() def _update_column_visibility(self): """Update data column visibility.""" if self.data is not None and self.class_var is not None: domain = self.data.domain first_attr = len(domain.class_vars) + len(domain.metas) for i in range(first_attr, first_attr + len(domain.attributes)): self.dataview.setColumnHidden(i, not self.show_attrs) if domain.class_var: self.dataview.setColumnHidden(0, False) def _update_data_sort_order(self): """Update data row order to match the current predictions view order""" datamodel = self.dataview.model() # data model proxy predmodel = self.predictionsview.model() # predictions model proxy sortindicatorshown = False if datamodel is not None: assert isinstance(datamodel, TableSortProxyModel) n = datamodel.rowCount() if predmodel is not None and predmodel.sortColumn() >= 0: sortind = numpy.argsort([ predmodel.mapToSource(predmodel.index(i, 0)).row() for i in range(n) ]) sortind = numpy.array(sortind, numpy.int) sortindicatorshown = True else: sortind = None datamodel.setSortIndices(sortind) self.predictionsview.horizontalHeader() \ .setSortIndicatorShown(sortindicatorshown) def _reset_order(self): """Reset the row sorting to original input order.""" datamodel = self.dataview.model() predmodel = self.predictionsview.model() if datamodel is not None: datamodel.sort(-1) if predmodel is not None: predmodel.sort(-1) self.predictionsview.horizontalHeader().setSortIndicatorShown(False) def _update_prediction_delegate(self): """Update the predicted probability visibility state""" if self.class_var is not None: delegate = PredictionsItemDelegate() if self.class_var.is_continuous: self._setup_delegate_continuous(delegate) else: self._setup_delegate_discrete(delegate) proxy = self.predictionsview.model() if proxy is not None: proxy.setProbInd( numpy.array(self.selected_classes, dtype=int)) self.predictionsview.setItemDelegate(delegate) self.predictionsview.resizeColumnsToContents() self._update_spliter() def _setup_delegate_discrete(self, delegate): colors = [QtGui.QColor(*rgb) for rgb in self.class_var.colors] fmt = [] if self.show_probabilities: fmt.append(" : ".join("{{dist[{}]:.2f}}".format(i) for i in sorted(self.selected_classes))) if self.show_predictions: fmt.append("{value!s}") delegate.setFormat(" \N{RIGHTWARDS ARROW} ".join(fmt)) if self.draw_dist and colors is not None: delegate.setColors(colors) return delegate def _setup_delegate_continuous(self, delegate): delegate.setFormat("{{value:.{}f}}".format( self.class_var.number_of_decimals)) def _update_spliter(self): if self.data is None: return def width(view): h_header = view.horizontalHeader() v_header = view.verticalHeader() return h_header.length() + v_header.width() w = width(self.predictionsview) + 4 w1, w2 = self.splitter.sizes() self.splitter.setSizes([w, w1 + w2 - w]) def commit(self): self._commit_predictions() self._commit_evaluation_results() def _commit_evaluation_results(self): slots = self._valid_predictors() if not slots or self.data.domain.class_var is None: self.Outputs.evaluation_results.send(None) return class_var = self.class_var nanmask = numpy.isnan(self.data.get_column_view(class_var)[0]) data = self.data[~nanmask] N = len(data) results = Orange.evaluation.Results(data, store_data=True) results.folds = None results.row_indices = numpy.arange(N) results.actual = data.Y.ravel() results.predicted = numpy.vstack( tuple(p.results[0][~nanmask] for p in slots)) if class_var and class_var.is_discrete: results.probabilities = numpy.array( [p.results[1][~nanmask] for p in slots]) results.learner_names = [p.name for p in slots] self.Outputs.evaluation_results.send(results) def _commit_predictions(self): slots = self._valid_predictors() if not slots: self.Outputs.predictions.send(None) return if self.class_var and self.class_var.is_discrete: newmetas, newcolumns = self._classification_output_columns() else: newmetas, newcolumns = self._regression_output_columns() attrs = list(self.data.domain.attributes) if self.output_attrs else [] metas = list(self.data.domain.metas) + newmetas domain = \ Orange.data.Domain(attrs, self.data.domain.class_var, metas=metas) predictions = self.data.transform(domain) if newcolumns: newcolumns = numpy.hstack( [numpy.atleast_2d(cols) for cols in newcolumns]) predictions.metas[:, -newcolumns.shape[1]:] = newcolumns self.Outputs.predictions.send(predictions) def _classification_output_columns(self): newmetas = [] newcolumns = [] slots = self._valid_predictors() if self.output_predictions: newmetas += [ DiscreteVariable(name=p.name, values=self.class_values) for p in slots ] newcolumns += [p.results[0].reshape((-1, 1)) for p in slots] if self.output_probabilities: newmetas += [ ContinuousVariable(name="%s (%s)" % (p.name, value)) for p in slots for value in self.class_values ] newcolumns += [p.results[1] for p in slots] return newmetas, newcolumns def _regression_output_columns(self): slots = self._valid_predictors() newmetas = [ContinuousVariable(name=p.name) for p in slots] newcolumns = [p.results[0].reshape((-1, 1)) for p in slots] return newmetas, newcolumns def send_report(self): def merge_data_with_predictions(): data_model = self.dataview.model() predictions_model = self.predictionsview.model() # use ItemDelegate to style prediction values style = lambda x: self.predictionsview.itemDelegate().displayText( x, QLocale()) # iterate only over visible columns of data's QTableView iter_data_cols = list( filter(lambda x: not self.dataview.isColumnHidden(x), range(data_model.columnCount()))) # print header yield [''] + \ [predictions_model.headerData(col, Qt.Horizontal, Qt.DisplayRole) for col in range(predictions_model.columnCount())] + \ [data_model.headerData(col, Qt.Horizontal, Qt.DisplayRole) for col in iter_data_cols] # print data & predictions for i in range(data_model.rowCount()): yield [data_model.headerData(i, Qt.Vertical, Qt.DisplayRole)] + \ [style(predictions_model.data(predictions_model.index(i, j))) for j in range(predictions_model.columnCount())] + \ [data_model.data(data_model.index(i, j)) for j in iter_data_cols] if self.data is not None and self.class_var is not None: text = self.infolabel.text().replace('\n', '<br>') if self.show_probabilities and self.selected_classes: text += '<br>Showing probabilities for: ' text += ', '.join( [self.class_values[i] for i in self.selected_classes]) self.report_paragraph('Info', text) self.report_table("Data & Predictions", merge_data_with_predictions(), header_rows=1, header_columns=1) @classmethod def predict(cls, predictor, data): class_var = predictor.domain.class_var if class_var: if class_var.is_discrete: return cls.predict_discrete(predictor, data) elif class_var.is_continuous: return cls.predict_continuous(predictor, data) @staticmethod def predict_discrete(predictor, data): return predictor(data, Model.ValueProbs) @staticmethod def predict_continuous(predictor, data): values = predictor(data, Model.Value) return values, [None] * len(data)
class OWPredictions(OWWidget): name = "Predictions" icon = "icons/Predictions.svg" priority = 200 description = "Display predictions of models for an input dataset." keywords = [] class Inputs: data = Input("Data", Orange.data.Table) predictors = Input("Predictors", Model, multiple=True) class Outputs: predictions = Output("Predictions", Orange.data.Table) evaluation_results = Output("Evaluation Results", Results) class Warning(OWWidget.Warning): empty_data = Msg("Empty dataset") wrong_targets = Msg( "Some model(s) predict a different target (see more ...)\n{}") class Error(OWWidget.Error): predictor_failed = Msg("Some predictor(s) failed (see more ...)\n{}") scorer_failed = Msg("Some scorer(s) failed (see more ...)\n{}") settingsHandler = settings.ClassValuesContextHandler() score_table = settings.SettingProvider(ScoreTable) #: List of selected class value indices in the `class_values` list selected_classes = settings.ContextSetting([]) def __init__(self): super().__init__() self.data = None # type: Optional[Orange.data.Table] self.predictors = {} # type: Dict[object, PredictorSlot] self.class_values = [] # type: List[str] self._delegates = [] gui.listBox(self.controlArea, self, "selected_classes", "class_values", box="Show probabibilities for", callback=self._update_prediction_delegate, selectionMode=QListWidget.MultiSelection, addSpace=False, sizePolicy=(QSizePolicy.Preferred, QSizePolicy.Preferred)) gui.rubber(self.controlArea) gui.button(self.controlArea, self, "Restore Original Order", callback=self._reset_order, tooltip="Show rows in the original order") table_opts = dict(horizontalScrollBarPolicy=Qt.ScrollBarAlwaysOn, horizontalScrollMode=QTableView.ScrollPerPixel, selectionMode=QTableView.NoSelection, focusPolicy=Qt.StrongFocus) self.dataview = TableView(verticalScrollBarPolicy=Qt.ScrollBarAlwaysOn, **table_opts) self.predictionsview = TableView( sortingEnabled=True, verticalScrollBarPolicy=Qt.ScrollBarAlwaysOff, **table_opts) self.dataview.verticalHeader().hide() dsbar = self.dataview.verticalScrollBar() psbar = self.predictionsview.verticalScrollBar() psbar.valueChanged.connect(dsbar.setValue) dsbar.valueChanged.connect(psbar.setValue) self.dataview.verticalHeader().setDefaultSectionSize(22) self.predictionsview.verticalHeader().setDefaultSectionSize(22) self.dataview.verticalHeader().sectionResized.connect( lambda index, _, size: self.predictionsview.verticalHeader( ).resizeSection(index, size)) self.splitter = QSplitter(orientation=Qt.Horizontal, childrenCollapsible=False, handleWidth=2) self.splitter.addWidget(self.predictionsview) self.splitter.addWidget(self.dataview) self.score_table = ScoreTable(self) self.vsplitter = gui.vBox(self.mainArea) self.vsplitter.layout().addWidget(self.splitter) self.vsplitter.layout().addWidget(self.score_table.view) @Inputs.data @check_sql_input def set_data(self, data): self.Warning.empty_data(shown=data is not None and not data) self.data = data if not data: self.dataview.setModel(None) self.predictionsview.setModel(None) else: # force full reset of the view's HeaderView state self.dataview.setModel(None) model = TableModel(data, parent=None) modelproxy = TableSortProxyModel() modelproxy.setSourceModel(model) self.dataview.setModel(modelproxy) self._invalidate_predictions() @property def class_var(self): return self.data and self.data.domain.class_var # pylint: disable=redefined-builtin @Inputs.predictors def set_predictor(self, predictor=None, id=None): if id in self.predictors: if predictor is not None: self.predictors[id] = self.predictors[id]._replace( predictor=predictor, name=predictor.name, results=None) else: del self.predictors[id] elif predictor is not None: self.predictors[id] = PredictorSlot(predictor, predictor.name, None) def _set_class_values(self): class_values = [] for slot in self.predictors.values(): class_var = slot.predictor.domain.class_var if class_var and class_var.is_discrete: for value in class_var.values: if value not in class_values: class_values.append(value) if self.class_var and self.class_var.is_discrete: values = self.class_var.values self.class_values = sorted(class_values, key=lambda val: val not in values) self.selected_classes = [ i for i, name in enumerate(class_values) if name in values ] else: self.class_values = class_values # This assignment updates listview self.selected_classes = [] def handleNewSignals(self): self._set_class_values() self._call_predictors() self._update_scores() self._update_predictions_model() self._update_prediction_delegate() self._set_errors() self._update_info() self.commit() def _call_predictors(self): if not self.data: return if self.class_var: domain = self.data.domain classless_data = self.data.transform( Domain(domain.attributes, None, domain.metas)) else: classless_data = self.data for inputid, slot in self.predictors.items(): if isinstance(slot.results, Results): continue predictor = slot.predictor try: if predictor.domain.class_var.is_discrete: pred, prob = predictor(classless_data, Model.ValueProbs) else: pred = predictor(classless_data, Model.Value) prob = numpy.zeros((len(pred), 0)) except (ValueError, DomainTransformationError) as err: self.predictors[inputid] = \ slot._replace(results=f"{predictor.name}: {err}") continue results = Results() results.data = self.data results.domain = self.data.domain results.row_indices = numpy.arange(len(self.data)) results.folds = (Ellipsis, ) results.actual = self.data.Y results.unmapped_probabilities = prob results.unmapped_predicted = pred results.probabilities = results.predicted = None self.predictors[inputid] = slot._replace(results=results) target = predictor.domain.class_var if target != self.class_var: continue if target is not self.class_var and target.is_discrete: backmappers, n_values = predictor.get_backmappers(self.data) prob = predictor.backmap_probs(prob, n_values, backmappers) pred = predictor.backmap_value(pred, prob, n_values, backmappers) results.predicted = pred.reshape((1, len(self.data))) results.probabilities = prob.reshape((1, ) + prob.shape) def _update_scores(self): model = self.score_table.model model.clear() scorers = usable_scorers(self.class_var) if self.class_var else [] self.score_table.update_header(scorers) errors = [] for inputid, pred in self.predictors.items(): results = self.predictors[inputid].results if not isinstance(results, Results) or results.predicted is None: continue row = [ QStandardItem(learner_name(pred.predictor)), QStandardItem("N/A"), QStandardItem("N/A") ] for scorer in scorers: item = QStandardItem() try: score = scorer_caller(scorer, results)()[0] item.setText(f"{score:.3f}") except Exception as exc: # pylint: disable=broad-except item.setToolTip(str(exc)) if scorer.name in self.score_table.shown_scores: errors.append(str(exc)) row.append(item) self.score_table.model.appendRow(row) view = self.score_table.view if model.rowCount(): view.setVisible(True) view.ensurePolished() view.setFixedHeight(5 + view.horizontalHeader().height() + view.verticalHeader().sectionSize(0) * model.rowCount()) else: view.setVisible(False) self.Error.scorer_failed("\n".join(errors), shown=bool(errors)) def _set_errors(self): # Not all predictors are run every time, so errors can't be collected # in _call_predictors errors = "\n".join(f"- {p.predictor.name}: {p.results}" for p in self.predictors.values() if isinstance(p.results, str) and p.results) self.Error.predictor_failed(errors, shown=bool(errors)) if self.class_var: inv_targets = "\n".join( f"- {pred.name} predicts '{pred.domain.class_var.name}'" for pred in (p.predictor for p in self.predictors.values() if isinstance(p.results, Results) and p.results.probabilities is None)) self.Warning.wrong_targets(inv_targets, shown=bool(inv_targets)) else: self.Warning.wrong_targets.clear() def _update_info(self): n_predictors = len(self.predictors) if not self.data and not n_predictors: self.info.set_input_summary(self.info.NoInput) return n_valid = len(self._non_errored_predictors()) summary = str(len(self.data)) if self.data else "0" details = f"{len(self.data)} instances" if self.data else "No data" details += f"\n{n_predictors} models" if n_predictors else "No models" if n_valid != n_predictors: details += f" ({n_predictors - n_valid} failed)" self.info.set_input_summary(summary, details) def _invalidate_predictions(self): for inputid, pred in list(self.predictors.items()): self.predictors[inputid] = pred._replace(results=None) def _non_errored_predictors(self): return [ p for p in self.predictors.values() if isinstance(p.results, Results) ] def _update_predictions_model(self): results = [] headers = [] for p in self._non_errored_predictors(): values = p.results.unmapped_predicted target = p.predictor.domain.class_var if target.is_discrete: prob = p.results.unmapped_probabilities values = [Value(target, v) for v in values] else: prob = numpy.zeros((len(values), 0)) results.append((values, prob)) headers.append(p.predictor.name) if results: results = list(zip(*(zip(*res) for res in results))) model = PredictionsModel(results, headers) else: model = None predmodel = PredictionsSortProxyModel() predmodel.setSourceModel(model) predmodel.setDynamicSortFilter(True) self.predictionsview.setModel(predmodel) hheader = self.predictionsview.horizontalHeader() hheader.setSortIndicatorShown(False) # SortFilterProxyModel is slow due to large abstraction overhead # (every comparison triggers multiple `model.index(...)`, # model.rowCount(...), `model.parent`, ... calls) hheader.setSectionsClickable(predmodel.rowCount() < 20000) predmodel.layoutChanged.connect(self._update_data_sort_order) self._update_data_sort_order() self.predictionsview.resizeColumnsToContents() def _update_data_sort_order(self): datamodel = self.dataview.model() # data model proxy predmodel = self.predictionsview.model() # predictions model proxy sortindicatorshown = False if datamodel is not None: assert isinstance(datamodel, TableSortProxyModel) n = datamodel.rowCount() if predmodel is not None and predmodel.sortColumn() >= 0: sortind = numpy.argsort([ predmodel.mapToSource(predmodel.index(i, 0)).row() for i in range(n) ]) sortind = numpy.array(sortind, numpy.int) sortindicatorshown = True else: sortind = None datamodel.setSortIndices(sortind) self.predictionsview.horizontalHeader() \ .setSortIndicatorShown(sortindicatorshown) def _reset_order(self): datamodel = self.dataview.model() predmodel = self.predictionsview.model() if datamodel is not None: datamodel.sort(-1) if predmodel is not None: predmodel.sort(-1) self.predictionsview.horizontalHeader().setSortIndicatorShown(False) def _update_prediction_delegate(self): selected = {self.class_values[i] for i in self.selected_classes} self._delegates.clear() for col, slot in enumerate(self.predictors.values()): target = slot.predictor.domain.class_var shown_probs = () if target.is_continuous else \ [i for i, name in enumerate(target.values) if name in selected] delegate = PredictionsItemDelegate(target, shown_probs) # QAbstractItemView does not take ownership of delegates, so we must self._delegates.append(delegate) self.predictionsview.setItemDelegateForColumn(col, delegate) self.predictionsview.setColumnHidden(col, False) self.predictionsview.resizeColumnsToContents() self._update_spliter() def _update_spliter(self): if not self.data: return def width(view): h_header = view.horizontalHeader() v_header = view.verticalHeader() return h_header.length() + v_header.width() w = width(self.predictionsview) + 4 w1, w2 = self.splitter.sizes() self.splitter.setSizes([w, w1 + w2 - w]) def commit(self): self._commit_predictions() self._commit_evaluation_results() def _commit_evaluation_results(self): slots = [ p for p in self._non_errored_predictors() if p.results.predicted is not None ] if not slots: self.Outputs.evaluation_results.send(None) return nanmask = numpy.isnan(self.data.get_column_view(self.class_var)[0]) data = self.data[~nanmask] results = Results(data, store_data=True) results.folds = None results.row_indices = numpy.arange(len(data)) results.actual = data.Y.ravel() results.predicted = numpy.vstack( tuple(p.results.predicted[0][~nanmask] for p in slots)) if self.class_var and self.class_var.is_discrete: results.probabilities = numpy.array( [p.results.probabilities[0][~nanmask] for p in slots]) results.learner_names = [p.name for p in slots] self.Outputs.evaluation_results.send(results) def _commit_predictions(self): if not self.data: self.Outputs.predictions.send(None) return newmetas = [] newcolumns = [] for slot in self._non_errored_predictors(): if slot.predictor.domain.class_var.is_discrete: self._add_classification_out_columns(slot, newmetas, newcolumns) else: self._add_regression_out_columns(slot, newmetas, newcolumns) attrs = list(self.data.domain.attributes) metas = list(self.data.domain.metas) + newmetas domain = Orange.data.Domain(attrs, self.class_var, metas=metas) predictions = self.data.transform(domain) if newcolumns: newcolumns = numpy.hstack( [numpy.atleast_2d(cols) for cols in newcolumns]) predictions.metas[:, -newcolumns.shape[1]:] = newcolumns self.Outputs.predictions.send(predictions) @staticmethod def _add_classification_out_columns(slot, newmetas, newcolumns): # Mapped or unmapped predictions?! # Or provide a checkbox so the user decides? pred = slot.predictor name = pred.name values = pred.domain.class_var.values newmetas.append(DiscreteVariable(name=name, values=values)) newcolumns.append(slot.results.unmapped_predicted.reshape(-1, 1)) newmetas += [ ContinuousVariable(name=f"{name} ({value})") for value in values ] newcolumns.append(slot.results.unmapped_probabilities) @staticmethod def _add_regression_out_columns(slot, newmetas, newcolumns): newmetas.append(ContinuousVariable(name=slot.predictor.name)) newcolumns.append(slot.results.unmapped_predicted.reshape((-1, 1))) def send_report(self): def merge_data_with_predictions(): data_model = self.dataview.model() predictions_model = self.predictionsview.model() # use ItemDelegate to style prediction values style = lambda x: self.predictionsview.itemDelegate().displayText( x, QLocale()) # iterate only over visible columns of data's QTableView iter_data_cols = list( filter(lambda x: not self.dataview.isColumnHidden(x), range(data_model.columnCount()))) # print header yield [''] + \ [predictions_model.headerData(col, Qt.Horizontal, Qt.DisplayRole) for col in range(predictions_model.columnCount())] + \ [data_model.headerData(col, Qt.Horizontal, Qt.DisplayRole) for col in iter_data_cols] # print data & predictions for i in range(data_model.rowCount()): yield [data_model.headerData(i, Qt.Vertical, Qt.DisplayRole)] + \ [style(predictions_model.data(predictions_model.index(i, j))) for j in range(predictions_model.columnCount())] + \ [data_model.data(data_model.index(i, j)) for j in iter_data_cols] if self.data: text = self.infolabel.text().replace('\n', '<br>') if self.selected_classes: text += '<br>Showing probabilities for: ' text += ', '.join( [self.class_values[i] for i in self.selected_classes]) self.report_paragraph('Info', text) self.report_table("Data & Predictions", merge_data_with_predictions(), header_rows=1, header_columns=1)
class OWPredictions(OWWidget): name = "Predictions" icon = "icons/Predictions.svg" priority = 200 description = "Display the predictions of models for an input dataset." keywords = [] class Inputs: data = Input("Data", Orange.data.Table) predictors = Input("Predictors", Model, multiple=True) class Outputs: predictions = Output("Predictions", Orange.data.Table) evaluation_results = Output("Evaluation Results", Orange.evaluation.Results, dynamic=False) class Warning(OWWidget.Warning): empty_data = Msg("Empty dataset") class Error(OWWidget.Error): predictor_failed = \ Msg("One or more predictors failed (see more...)\n{}") predictors_target_mismatch = \ Msg("Predictors do not have the same target.") data_target_mismatch = \ Msg("Data does not have the same target as predictors.") settingsHandler = settings.ClassValuesContextHandler() #: Display the full input dataset or only the target variable columns (if #: available) show_attrs = settings.Setting(True) #: Show predicted values (for discrete target variable) show_predictions = settings.Setting(True) #: Show predictions probabilities (for discrete target variable) show_probabilities = settings.Setting(True) #: List of selected class value indices in the "Show probabilities" list selected_classes = settings.ContextSetting([]) #: Draw colored distribution bars draw_dist = settings.Setting(True) output_attrs = settings.Setting(True) output_predictions = settings.Setting(True) output_probabilities = settings.Setting(True) def __init__(self): super().__init__() #: Input data table self.data = None # type: Optional[Orange.data.Table] #: A dict mapping input ids to PredictorSlot self.predictors = OrderedDict() # type: Dict[object, PredictorSlot] #: A class variable (prediction target) self.class_var = None # type: Optional[Orange.data.Variable] #: List of (discrete) class variable's values self.class_values = [] # type: List[str] box = gui.vBox(self.controlArea, "Info") self.infolabel = gui.widgetLabel( box, "No data on input.\nPredictors: 0\nTask: N/A") self.infolabel.setMinimumWidth(150) gui.button(box, self, "Restore Original Order", callback=self._reset_order, tooltip="Show rows in the original order") self.classification_options = box = gui.vBox( self.controlArea, "Show", spacing=-1, addSpace=False) gui.checkBox(box, self, "show_predictions", "Predicted class", callback=self._update_prediction_delegate) b = gui.checkBox(box, self, "show_probabilities", "Predicted probabilities for:", callback=self._update_prediction_delegate) ibox = gui.indentedBox(box, sep=gui.checkButtonOffsetHint(b), addSpace=False) gui.listBox(ibox, self, "selected_classes", "class_values", callback=self._update_prediction_delegate, selectionMode=QListWidget.MultiSelection, addSpace=False) gui.checkBox(box, self, "draw_dist", "Draw distribution bars", callback=self._update_prediction_delegate) box = gui.vBox(self.controlArea, "Data View") gui.checkBox(box, self, "show_attrs", "Show full dataset", callback=self._update_column_visibility) box = gui.vBox(self.controlArea, "Output", spacing=-1) self.checkbox_class = gui.checkBox( box, self, "output_attrs", "Original data", callback=self.commit) self.checkbox_class = gui.checkBox( box, self, "output_predictions", "Predictions", callback=self.commit) self.checkbox_prob = gui.checkBox( box, self, "output_probabilities", "Probabilities", callback=self.commit) gui.rubber(self.controlArea) self.splitter = QSplitter( orientation=Qt.Horizontal, childrenCollapsible=False, handleWidth=2, ) self.dataview = TableView( verticalScrollBarPolicy=Qt.ScrollBarAlwaysOn, horizontalScrollBarPolicy=Qt.ScrollBarAlwaysOn, horizontalScrollMode=QTableView.ScrollPerPixel, selectionMode=QTableView.NoSelection, focusPolicy=Qt.StrongFocus ) self.predictionsview = TableView( verticalScrollBarPolicy=Qt.ScrollBarAlwaysOff, horizontalScrollBarPolicy=Qt.ScrollBarAlwaysOn, horizontalScrollMode=QTableView.ScrollPerPixel, selectionMode=QTableView.NoSelection, focusPolicy=Qt.StrongFocus, sortingEnabled=True, ) self.predictionsview.setItemDelegate(PredictionsItemDelegate()) self.dataview.verticalHeader().hide() dsbar = self.dataview.verticalScrollBar() psbar = self.predictionsview.verticalScrollBar() psbar.valueChanged.connect(dsbar.setValue) dsbar.valueChanged.connect(psbar.setValue) self.dataview.verticalHeader().setDefaultSectionSize(22) self.predictionsview.verticalHeader().setDefaultSectionSize(22) self.dataview.verticalHeader().sectionResized.connect( lambda index, _, size: self.predictionsview.verticalHeader().resizeSection(index, size) ) self.splitter.addWidget(self.predictionsview) self.splitter.addWidget(self.dataview) self.mainArea.layout().addWidget(self.splitter) @Inputs.data @check_sql_input def set_data(self, data): """Set the input dataset""" if data is not None and not len(data): data = None self.Warning.empty_data() else: self.Warning.empty_data.clear() self.data = data if data is None: self.dataview.setModel(None) self.predictionsview.setModel(None) self.predictionsview.setItemDelegate(PredictionsItemDelegate()) else: # force full reset of the view's HeaderView state self.dataview.setModel(None) model = TableModel(data, parent=None) modelproxy = TableSortProxyModel() modelproxy.setSourceModel(model) self.dataview.setModel(modelproxy) self._update_column_visibility() self._invalidate_predictions() @Inputs.predictors def set_predictor(self, predictor=None, id=None): if id in self.predictors: if predictor is not None: self.predictors[id] = self.predictors[id]._replace( predictor=predictor, name=predictor.name, results=None) else: del self.predictors[id] elif predictor is not None: self.predictors[id] = \ PredictorSlot(predictor, predictor.name, None) def set_class_var(self): pred_classes = set(pred.predictor.domain.class_var for pred in self.predictors.values()) self.Error.predictors_target_mismatch.clear() self.Error.data_target_mismatch.clear() self.class_var = None if len(pred_classes) > 1: self.Error.predictors_target_mismatch() if len(pred_classes) == 1: self.class_var = pred_classes.pop() if self.data is not None and \ self.data.domain.class_var is not None and \ self.class_var != self.data.domain.class_var: self.Error.data_target_mismatch() self.class_var = None discrete_class = self.class_var is not None \ and self.class_var.is_discrete self.classification_options.setVisible(discrete_class) self.closeContext() if discrete_class: self.class_values = list(self.class_var.values) self.selected_classes = list(range(len(self.class_values))) self.openContext(self.class_var) else: self.class_values = [] self.selected_classes = [] def handleNewSignals(self): self.set_class_var() if self.data is not None: self._call_predictors() self._update_predictions_model() self._update_prediction_delegate() self._set_errors() self._update_info() self.commit() def _call_predictors(self): for inputid, pred in self.predictors.items(): if pred.results is None \ or isinstance(pred.results, str) \ or numpy.isnan(pred.results[0]).all(): try: results = self.predict(pred.predictor, self.data) except (ValueError, DomainTransformationError) as err: results = "{}: {}".format(pred.predictor.name, err) self.predictors[inputid] = pred._replace(results=results) def _set_errors(self): errors = "\n".join(p.results for p in self.predictors.values() if isinstance(p.results, str)) if errors: self.Error.predictor_failed(errors) else: self.Error.predictor_failed.clear() def _update_info(self): info = [] if self.data is not None: info.append("Data: {} instances.".format(len(self.data))) else: info.append("Data: N/A") n_predictors = len(self.predictors) n_valid = len(self._valid_predictors()) if n_valid != n_predictors: info.append("Predictors: {} (+ {} failed)".format( n_valid, n_predictors - n_valid)) else: info.append("Predictors: {}".format(n_predictors or "N/A")) if self.class_var is None: info.append("Task: N/A") elif self.class_var.is_discrete: info.append("Task: Classification") self.checkbox_class.setEnabled(True) self.checkbox_prob.setEnabled(True) else: info.append("Task: Regression") self.checkbox_class.setEnabled(False) self.checkbox_prob.setEnabled(False) self.infolabel.setText("\n".join(info)) def _invalidate_predictions(self): for inputid, pred in list(self.predictors.items()): self.predictors[inputid] = pred._replace(results=None) def _valid_predictors(self): if self.class_var is not None and \ self.data is not None: return [p for p in self.predictors.values() if p.results is not None and not isinstance(p.results, str)] else: return [] def _update_predictions_model(self): """Update the prediction view model.""" if self.data is not None and self.class_var is not None: slots = self._valid_predictors() results = [] class_var = self.class_var for p in slots: values, prob = p.results if self.class_var.is_discrete: # if values were added to class_var between building the # model and predicting, add zeros for new class values, # which are always at the end prob = numpy.c_[ prob, numpy.zeros((prob.shape[0], len(class_var.values) - prob.shape[1]))] values = [Value(class_var, v) for v in values] results.append((values, prob)) results = list(zip(*(zip(*res) for res in results))) headers = [p.name for p in slots] model = PredictionsModel(results, headers) else: model = None predmodel = PredictionsSortProxyModel() predmodel.setSourceModel(model) predmodel.setDynamicSortFilter(True) self.predictionsview.setItemDelegate(PredictionsItemDelegate()) self.predictionsview.setModel(predmodel) hheader = self.predictionsview.horizontalHeader() hheader.setSortIndicatorShown(False) # SortFilterProxyModel is slow due to large abstraction overhead # (every comparison triggers multiple `model.index(...)`, # model.rowCount(...), `model.parent`, ... calls) hheader.setSectionsClickable(predmodel.rowCount() < 20000) predmodel.layoutChanged.connect(self._update_data_sort_order) self._update_data_sort_order() self.predictionsview.resizeColumnsToContents() def _update_column_visibility(self): """Update data column visibility.""" if self.data is not None and self.class_var is not None: domain = self.data.domain first_attr = len(domain.class_vars) + len(domain.metas) for i in range(first_attr, first_attr + len(domain.attributes)): self.dataview.setColumnHidden(i, not self.show_attrs) if domain.class_var: self.dataview.setColumnHidden(0, False) def _update_data_sort_order(self): """Update data row order to match the current predictions view order""" datamodel = self.dataview.model() # data model proxy predmodel = self.predictionsview.model() # predictions model proxy sortindicatorshown = False if datamodel is not None: assert isinstance(datamodel, TableSortProxyModel) n = datamodel.rowCount() if predmodel is not None and predmodel.sortColumn() >= 0: sortind = numpy.argsort( [predmodel.mapToSource(predmodel.index(i, 0)).row() for i in range(n)]) sortind = numpy.array(sortind, numpy.int) sortindicatorshown = True else: sortind = None datamodel.setSortIndices(sortind) self.predictionsview.horizontalHeader() \ .setSortIndicatorShown(sortindicatorshown) def _reset_order(self): """Reset the row sorting to original input order.""" datamodel = self.dataview.model() predmodel = self.predictionsview.model() if datamodel is not None: datamodel.sort(-1) if predmodel is not None: predmodel.sort(-1) self.predictionsview.horizontalHeader().setSortIndicatorShown(False) def _update_prediction_delegate(self): """Update the predicted probability visibility state""" if self.class_var is not None: delegate = PredictionsItemDelegate() if self.class_var.is_continuous: self._setup_delegate_continuous(delegate) else: self._setup_delegate_discrete(delegate) proxy = self.predictionsview.model() if proxy is not None: proxy.setProbInd( numpy.array(self.selected_classes, dtype=int)) self.predictionsview.setItemDelegate(delegate) self.predictionsview.resizeColumnsToContents() self._update_spliter() def _setup_delegate_discrete(self, delegate): colors = [QtGui.QColor(*rgb) for rgb in self.class_var.colors] fmt = [] if self.show_probabilities: fmt.append(" : ".join("{{dist[{}]:.2f}}".format(i) for i in sorted(self.selected_classes))) if self.show_predictions: fmt.append("{value!s}") delegate.setFormat(" \N{RIGHTWARDS ARROW} ".join(fmt)) if self.draw_dist and colors is not None: delegate.setColors(colors) return delegate def _setup_delegate_continuous(self, delegate): delegate.setFormat("{{value:{}}}".format(self.class_var.format_str[1:])) def _update_spliter(self): if self.data is None: return def width(view): h_header = view.horizontalHeader() v_header = view.verticalHeader() return h_header.length() + v_header.width() w = width(self.predictionsview) + 4 w1, w2 = self.splitter.sizes() self.splitter.setSizes([w, w1 + w2 - w]) def commit(self): self._commit_predictions() self._commit_evaluation_results() def _commit_evaluation_results(self): slots = self._valid_predictors() if not slots or self.data.domain.class_var is None: self.Outputs.evaluation_results.send(None) return class_var = self.class_var nanmask = numpy.isnan(self.data.get_column_view(class_var)[0]) data = self.data[~nanmask] N = len(data) results = Orange.evaluation.Results(data, store_data=True) results.folds = None results.row_indices = numpy.arange(N) results.actual = data.Y.ravel() results.predicted = numpy.vstack( tuple(p.results[0][~nanmask] for p in slots)) if class_var and class_var.is_discrete: results.probabilities = numpy.array( [p.results[1][~nanmask] for p in slots]) results.learner_names = [p.name for p in slots] self.Outputs.evaluation_results.send(results) def _commit_predictions(self): slots = self._valid_predictors() if not slots: self.Outputs.predictions.send(None) return if self.class_var and self.class_var.is_discrete: newmetas, newcolumns = self._classification_output_columns() else: newmetas, newcolumns = self._regression_output_columns() attrs = list(self.data.domain.attributes) if self.output_attrs else [] metas = list(self.data.domain.metas) + newmetas domain = \ Orange.data.Domain(attrs, self.data.domain.class_var, metas=metas) predictions = self.data.transform(domain) if newcolumns: newcolumns = numpy.hstack( [numpy.atleast_2d(cols) for cols in newcolumns]) predictions.metas[:, -newcolumns.shape[1]:] = newcolumns self.Outputs.predictions.send(predictions) def _classification_output_columns(self): newmetas = [] newcolumns = [] slots = self._valid_predictors() if self.output_predictions: newmetas += [DiscreteVariable(name=p.name, values=self.class_values) for p in slots] newcolumns += [p.results[0].reshape((-1, 1)) for p in slots] if self.output_probabilities: newmetas += [ContinuousVariable(name="%s (%s)" % (p.name, value)) for p in slots for value in self.class_values] newcolumns += [p.results[1] for p in slots] return newmetas, newcolumns def _regression_output_columns(self): slots = self._valid_predictors() newmetas = [ContinuousVariable(name=p.name) for p in slots] newcolumns = [p.results[0].reshape((-1, 1)) for p in slots] return newmetas, newcolumns def send_report(self): def merge_data_with_predictions(): data_model = self.dataview.model() predictions_model = self.predictionsview.model() # use ItemDelegate to style prediction values style = lambda x: self.predictionsview.itemDelegate().displayText(x, QLocale()) # iterate only over visible columns of data's QTableView iter_data_cols = list(filter(lambda x: not self.dataview.isColumnHidden(x), range(data_model.columnCount()))) # print header yield [''] + \ [predictions_model.headerData(col, Qt.Horizontal, Qt.DisplayRole) for col in range(predictions_model.columnCount())] + \ [data_model.headerData(col, Qt.Horizontal, Qt.DisplayRole) for col in iter_data_cols] # print data & predictions for i in range(data_model.rowCount()): yield [data_model.headerData(i, Qt.Vertical, Qt.DisplayRole)] + \ [style(predictions_model.data(predictions_model.index(i, j))) for j in range(predictions_model.columnCount())] + \ [data_model.data(data_model.index(i, j)) for j in iter_data_cols] if self.data is not None and self.class_var is not None: text = self.infolabel.text().replace('\n', '<br>') if self.show_probabilities and self.selected_classes: text += '<br>Showing probabilities for: ' text += ', '. join([self.class_values[i] for i in self.selected_classes]) self.report_paragraph('Info', text) self.report_table("Data & Predictions", merge_data_with_predictions(), header_rows=1, header_columns=1) @classmethod def predict(cls, predictor, data): class_var = predictor.domain.class_var if class_var: if class_var.is_discrete: return cls.predict_discrete(predictor, data) elif class_var.is_continuous: return cls.predict_continuous(predictor, data) @staticmethod def predict_discrete(predictor, data): return predictor(data, Model.ValueProbs) @staticmethod def predict_continuous(predictor, data): values = predictor(data, Model.Value) return values, [None] * len(data)