class OWSilhouettePlot(widget.OWWidget): name = "Silhouette Plot" description = "Visually assess cluster quality and " \ "the degree of cluster membership." icon = "icons/SilhouettePlot.svg" priority = 300 keywords = [] class Inputs: data = Input("Data", Orange.data.Table) class Outputs: selected_data = Output("Selected Data", Orange.data.Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table) replaces = [ "orangecontrib.prototypes.widgets.owsilhouetteplot.OWSilhouettePlot", "Orange.widgets.unsupervised.owsilhouetteplot.OWSilhouettePlot" ] settingsHandler = settings.PerfectDomainContextHandler() #: Distance metric index distance_idx = settings.Setting(0) #: Group/cluster variable index cluster_var_idx = settings.ContextSetting(0) #: Annotation variable index annotation_var_idx = settings.ContextSetting(0) #: Group the (displayed) silhouettes by cluster group_by_cluster = settings.Setting(True) #: A fixed size for an instance bar bar_size = settings.Setting(3) #: Add silhouette scores to output data add_scores = settings.Setting(False) auto_commit = settings.Setting(True) Distances = [("Euclidean", Orange.distance.Euclidean), ("Manhattan", Orange.distance.Manhattan)] graph_name = "scene" buttons_area_orientation = Qt.Vertical class Error(widget.OWWidget.Error): need_two_clusters = Msg("Need at least two non-empty clusters") singleton_clusters_all = Msg("All clusters are singletons") memory_error = Msg("Not enough memory") value_error = Msg("Distances could not be computed: '{}'") class Warning(widget.OWWidget.Warning): missing_cluster_assignment = Msg( "{} instance{s} omitted (missing cluster assignment)") def __init__(self): super().__init__() #: The input data self.data = None # type: Optional[Orange.data.Table] #: Distance matrix computed from data self._matrix = None # type: Optional[Orange.misc.DistMatrix] #: An bool mask (size == len(data)) indicating missing group/cluster #: assignments self._mask = None # type: Optional[np.ndarray] #: An array of cluster/group labels for instances with valid group #: assignment self._labels = None # type: Optional[np.ndarray] #: An array of silhouette scores for instances with valid group #: assignment self._silhouette = None # type: Optional[np.ndarray] self._silplot = None # type: Optional[SilhouettePlot] gui.comboBox(self.controlArea, self, "distance_idx", box="Distance", items=[name for name, _ in OWSilhouettePlot.Distances], orientation=Qt.Horizontal, callback=self._invalidate_distances) box = gui.vBox(self.controlArea, "Cluster Label") self.cluster_var_cb = gui.comboBox(box, self, "cluster_var_idx", contentsLength=14, addSpace=4, callback=self._invalidate_scores) gui.checkBox(box, self, "group_by_cluster", "Group by cluster", callback=self._replot) self.cluster_var_model = itemmodels.VariableListModel(parent=self) self.cluster_var_cb.setModel(self.cluster_var_model) box = gui.vBox(self.controlArea, "Bars") gui.widgetLabel(box, "Bar width:") gui.hSlider(box, self, "bar_size", minValue=1, maxValue=10, step=1, callback=self._update_bar_size, addSpace=6) gui.widgetLabel(box, "Annotations:") self.annotation_cb = gui.comboBox(box, self, "annotation_var_idx", contentsLength=14, callback=self._update_annotations) self.annotation_var_model = itemmodels.VariableListModel(parent=self) self.annotation_var_model[:] = ["None"] self.annotation_cb.setModel(self.annotation_var_model) ibox = gui.indentedBox(box, 5) self.ann_hidden_warning = warning = gui.widgetLabel( ibox, "(increase the width to show)") ibox.setFixedWidth(ibox.sizeHint().width()) warning.setVisible(False) gui.rubber(self.controlArea) gui.separator(self.buttonsArea) box = gui.vBox(self.buttonsArea, "Output") # Thunk the call to commit to call conditional commit gui.checkBox(box, self, "add_scores", "Add silhouette scores", callback=lambda: self.commit()) gui.auto_commit(box, self, "auto_commit", "Commit", auto_label="Auto commit", box=False) # Ensure that the controlArea is not narrower than buttonsArea self.controlArea.layout().addWidget(self.buttonsArea) self.scene = QGraphicsScene() self.view = QGraphicsView(self.scene) self.view.setRenderHint(QPainter.Antialiasing, True) self.view.setAlignment(Qt.AlignTop | Qt.AlignLeft) self.mainArea.layout().addWidget(self.view) def sizeHint(self): sh = self.controlArea.sizeHint() return sh.expandedTo(QSize(600, 720)) @Inputs.data @check_sql_input def set_data(self, data): """ Set the input dataset. """ self.closeContext() self.clear() error_msg = "" warning_msg = "" candidatevars = [] if data is not None: candidatevars = [ v for v in data.domain.variables + data.domain.metas if v.is_discrete and len(v.values) >= 2 ] if not candidatevars: error_msg = "Input does not have any suitable labels." data = None self.data = data if data is not None: self.cluster_var_model[:] = candidatevars if data.domain.class_var in candidatevars: self.cluster_var_idx = \ candidatevars.index(data.domain.class_var) else: self.cluster_var_idx = 0 annotvars = [var for var in data.domain.metas if var.is_string] self.annotation_var_model[:] = ["None"] + annotvars self.annotation_var_idx = 1 if len(annotvars) else 0 self.openContext(Orange.data.Domain(candidatevars)) self.error(error_msg) self.warning(warning_msg) def handleNewSignals(self): if self.data is not None: self._update() self._replot() self.unconditional_commit() def clear(self): """ Clear the widget state. """ self.data = None self._matrix = None self._mask = None self._silhouette = None self._labels = None self.cluster_var_model[:] = [] self.annotation_var_model[:] = ["None"] self._clear_scene() self.Error.clear() self.Warning.clear() def _clear_scene(self): # Clear the graphics scene and associated objects self.scene.clear() self.scene.setSceneRect(QRectF()) self._silplot = None def _invalidate_distances(self): # Invalidate the computed distance matrix and recompute the silhouette. self._matrix = None self._invalidate_scores() def _invalidate_scores(self): # Invalidate and recompute the current silhouette scores. self._labels = self._silhouette = self._mask = None self._update() self._replot() if self.data is not None: self.commit() def _update(self): # Update/recompute the distances/scores as required self._clear_messages() if self.data is None or not len(self.data): self._reset_all() return if self._matrix is None and self.data is not None: _, metric = self.Distances[self.distance_idx] try: self._matrix = np.asarray(metric(self.data)) except MemoryError: self.Error.memory_error() return except ValueError as err: self.Error.value_error(str(err)) return self._update_labels() def _reset_all(self): self._mask = None self._silhouette = None self._labels = None self._matrix = None self._clear_scene() def _clear_messages(self): self.Error.clear() self.Warning.missing_cluster_assignment.clear() def _update_labels(self): labelvar = self.cluster_var_model[self.cluster_var_idx] labels, _ = self.data.get_column_view(labelvar) labels = np.asarray(labels, dtype=float) mask = np.isnan(labels) labels = labels.astype(int) labels = labels[~mask] labels_unq, _ = np.unique(labels, return_counts=True) if len(labels_unq) < 2: self.Error.need_two_clusters() labels = silhouette = mask = None elif len(labels_unq) == len(labels): self.Error.singleton_clusters_all() labels = silhouette = mask = None else: silhouette = sklearn.metrics.silhouette_samples( self._matrix[~mask, :][:, ~mask], labels, metric="precomputed") self._mask = mask self._labels = labels self._silhouette = silhouette if labels is not None: count_missing = np.count_nonzero(mask) if count_missing: self.Warning.missing_cluster_assignment( count_missing, s="s" if count_missing > 1 else "") def _set_bar_height(self): visible = self.bar_size >= 5 self._silplot.setBarHeight(self.bar_size) self._silplot.setRowNamesVisible(visible) self.ann_hidden_warning.setVisible(not visible and self.annotation_var_idx > 0) def _replot(self): # Clear and replot/initialize the scene self._clear_scene() if self._silhouette is not None and self._labels is not None: var = self.cluster_var_model[self.cluster_var_idx] self._silplot = silplot = SilhouettePlot() self._set_bar_height() if self.group_by_cluster: silplot.setScores(self._silhouette, self._labels, var.values, var.colors) else: silplot.setScores(self._silhouette, np.zeros(len(self._silhouette), dtype=int), [""], np.array([[63, 207, 207]])) self.scene.addItem(silplot) self._update_annotations() silplot.selectionChanged.connect(self.commit) silplot.layout().activate() self._update_scene_rect() silplot.geometryChanged.connect(self._update_scene_rect) def _update_bar_size(self): if self._silplot is not None: self._set_bar_height() def _update_annotations(self): if 0 < self.annotation_var_idx < len(self.annotation_var_model): annot_var = self.annotation_var_model[self.annotation_var_idx] else: annot_var = None self.ann_hidden_warning.setVisible(self.bar_size < 5 and annot_var is not None) if self._silplot is not None: if annot_var is not None: column, _ = self.data.get_column_view(annot_var) if self._mask is not None: assert column.shape == self._mask.shape column = column[~self._mask] self._silplot.setRowNames( [annot_var.str_val(value) for value in column]) else: self._silplot.setRowNames(None) def _update_scene_rect(self): self.scene.setSceneRect(self._silplot.geometry()) def commit(self): """ Commit/send the current selection to the output. """ selected = indices = data = None if self.data is not None: selectedmask = np.full(len(self.data), False, dtype=bool) if self._silplot is not None: indices = self._silplot.selection() assert (np.diff(indices) > 0).all(), "strictly increasing" if self._mask is not None: indices = np.flatnonzero(~self._mask)[indices] selectedmask[indices] = True if self._mask is not None: scores = np.full(shape=selectedmask.shape, fill_value=np.nan) scores[~self._mask] = self._silhouette else: scores = self._silhouette silhouette_var = None if self.add_scores: var = self.cluster_var_model[self.cluster_var_idx] silhouette_var = Orange.data.ContinuousVariable( "Silhouette ({})".format(escape(var.name))) domain = Orange.data.Domain( self.data.domain.attributes, self.data.domain.class_vars, self.data.domain.metas + (silhouette_var, )) data = self.data.transform(domain) else: domain = self.data.domain data = self.data if np.count_nonzero(selectedmask): selected = self.data.from_table(domain, self.data, np.flatnonzero(selectedmask)) if self.add_scores: if selected is not None: selected[:, silhouette_var] = np.c_[scores[selectedmask]] data[:, silhouette_var] = np.c_[scores] self.Outputs.selected_data.send(selected) self.Outputs.annotated_data.send(create_annotated_table(data, indices)) def send_report(self): if not len(self.cluster_var_model): return self.report_plot() caption = "Silhouette plot ({} distance), clustered by '{}'".format( self.Distances[self.distance_idx][0], self.cluster_var_model[self.cluster_var_idx]) if self.annotation_var_idx and self._silplot.rowNamesVisible(): caption += ", annotated with '{}'".format( self.annotation_var_model[self.annotation_var_idx]) self.report_caption(caption) def onDeleteWidget(self): self.clear() super().onDeleteWidget()
class OWColor(widget.OWWidget): name = "Color" description = "Set color legend for variables." icon = "icons/Colors.svg" class Inputs: data = Input("Data", Orange.data.Table) class Outputs: data = Output("Data", Orange.data.Table) settingsHandler = settings.PerfectDomainContextHandler( match_values=settings.PerfectDomainContextHandler.MATCH_VALUES_ALL) disc_data = settings.ContextSetting([]) cont_data = settings.ContextSetting([]) color_settings = settings.Setting(None) selected_schema_index = settings.Setting(0) auto_apply = settings.Setting(True) want_main_area = False def __init__(self): super().__init__() self.data = None self.orig_domain = self.domain = None self.disc_colors = [] self.cont_colors = [] box = gui.hBox(self.controlArea, "Discrete Variables") self.disc_model = DiscColorTableModel() disc_view = self.disc_view = DiscreteTable(self.disc_model) disc_view.horizontalHeader().setSectionResizeMode( QHeaderView.ResizeToContents) self.disc_model.dataChanged.connect(self._on_data_changed) box.layout().addWidget(disc_view) box = gui.hBox(self.controlArea, "Numeric Variables") self.cont_model = ContColorTableModel() cont_view = self.cont_view = ContinuousTable(self, self.cont_model) cont_view.setColumnWidth(1, 256) self.cont_model.dataChanged.connect(self._on_data_changed) box.layout().addWidget(cont_view) box = gui.auto_commit(self.controlArea, self, "auto_apply", "Apply", orientation=Qt.Horizontal, checkbox_label="Apply automatically") box.layout().insertSpacing(0, 20) box.layout().insertWidget(0, self.report_button) def _create_proxies(self, variables): part_vars = [] for var in variables: if var.is_discrete or var.is_continuous: var = var.make_proxy() if var.is_discrete: var.values = var.values[:] self.disc_colors.append(var) else: self.cont_colors.append(var) part_vars.append(var) return part_vars @Inputs.data def set_data(self, data): """Handle data input signal""" self.closeContext() self.disc_colors = [] self.cont_colors = [] if data is None: self.data = self.domain = None else: domain = self.orig_domain = data.domain domain = Orange.data.Domain( self._create_proxies(domain.attributes), self._create_proxies(domain.class_vars), self._create_proxies(domain.metas)) self.openContext(data) self.data = data.transform(domain) self.disc_model.set_data(self.disc_colors) self.cont_model.set_data(self.cont_colors) self.disc_view.resizeColumnsToContents() self.cont_view.resizeColumnsToContents() self.commit() def storeSpecificSettings(self): # Store the colors that were changed -- but not others self.current_context.disc_data = \ [(var.name, var.values, "colors" in var.attributes and var.colors) for var in self.disc_colors] self.current_context.cont_data = \ [(var.name, "colors" in var.attributes and var.colors) for var in self.cont_colors] def retrieveSpecificSettings(self): disc_data = getattr(self.current_context, "disc_data", ()) for var, (name, values, colors) in zip(self.disc_colors, disc_data): var.name = name var.values = values[:] if colors is not False: var.colors = colors cont_data = getattr(self.current_context, "cont_data", ()) for var, (name, colors) in zip(self.cont_colors, cont_data): var.name = name if colors is not False: var.colors = colors def _on_data_changed(self, *args): self.commit() def commit(self): self.Outputs.data.send(self.data) def send_report(self): """Send report""" def _report_variables(variables, orig_variables): from Orange.canvas.report import colored_square as square def was(n, o): return n if n == o else "{} (was: {})".format(n, o) # definition of td element for continuous gradient # with support for pre-standard css (needed at least for Qt 4.8) max_values = max( (len(var.values) for var in variables if var.is_discrete), default=1) defs = ("-webkit-", "-o-", "-moz-", "") cont_tpl = '<td colspan="{}">' \ '<span class="legend-square" style="width: 100px; '.\ format(max_values) + \ " ".join(map( "background: {}linear-gradient(" "left, rgb({{}}, {{}}, {{}}), {{}}rgb({{}}, {{}}, {{}}));" .format, defs)) + \ '"></span></td>' rows = "" for var, ovar in zip(variables, orig_variables): if var.is_discrete: values = " \n".join( "<td>{} {}</td>".format(square( *var.colors[i]), was(value, ovalue)) for i, ( value, ovalue) in enumerate(zip(var.values, ovar.values))) elif var.is_continuous: col = var.colors colors = col[0][:3] + ("black, " * col[2], ) + col[1][:3] values = cont_tpl.format(*colors * len(defs)) else: continue name = was(var.name, ovar.name) rows += '<tr style="height: 2em">\n' \ ' <th style="text-align: right">{}</th>{}\n</tr>\n'. \ format(name, values) return rows if not self.data: return domain = self.data.domain orig_domain = self.orig_domain sections = ((name, _report_variables(vars, ovars)) for name, vars, ovars in (("Features", domain.attributes, orig_domain.attributes), ("Outcome" + "s" * (len(domain.class_vars) > 1), domain.class_vars, orig_domain.class_vars), ("Meta attributes", domain.metas, orig_domain.metas))) table = "".join("<tr><th>{}</th></tr>{}".format(name, rows) for name, rows in sections if rows) if table: self.report_raw("<table>{}</table>".format(table))
class OWSilhouettePlot(widget.OWWidget): name = "Silhouette Plot" description = "Silhouette Plot" icon = "icons/Silhouette.svg" inputs = [("Data", Orange.data.Table, "set_data")] outputs = [("Selected Data", Orange.data.Table, widget.Default), ("Other Data", Orange.data.Table)] replaces = [ "orangecontrib.prototypes.widgets.owsilhouetteplot.OWSilhouettePlot" ] settingsHandler = settings.PerfectDomainContextHandler() #: Distance metric index distance_idx = settings.Setting(0) #: Group/cluster variable index cluster_var_idx = settings.ContextSetting(0) #: Annotation variable index annotation_var_idx = settings.ContextSetting(0) #: Group the silhouettes by cluster group_by_cluster = settings.Setting(True) #: A fixed size for an instance bar bar_size = settings.Setting(3) #: Add silhouette scores to output data add_scores = settings.Setting(False) auto_commit = settings.Setting(False) Distances = [("Euclidean", Orange.distance.Euclidean), ("Manhattan", Orange.distance.Manhattan)] def __init__(self): super().__init__() self.data = None self._effective_data = None self._matrix = None self._silhouette = None self._labels = None self._silplot = None box = gui.vBox( self.controlArea, "Settings", ) gui.comboBox(box, self, "distance_idx", label="Distance", items=[name for name, _ in OWSilhouettePlot.Distances], callback=self._invalidate_distances) self.cluster_var_cb = gui.comboBox(box, self, "cluster_var_idx", label="Cluster", callback=self._invalidate_scores) self.cluster_var_model = itemmodels.VariableListModel(parent=self) self.cluster_var_cb.setModel(self.cluster_var_model) gui.spin(box, self, "bar_size", minv=1, maxv=10, label="Bar Size", callback=self._update_bar_size) gui.checkBox(box, self, "group_by_cluster", "Group by cluster", callback=self._replot) self.annotation_cb = gui.comboBox(box, self, "annotation_var_idx", label="Annotations", callback=self._update_annotations) self.annotation_var_model = itemmodels.VariableListModel(parent=self) self.annotation_var_model[:] = ["None"] self.annotation_cb.setModel(self.annotation_var_model) gui.rubber(self.controlArea) box = gui.vBox(self.controlArea, "Output") gui.checkBox( box, self, "add_scores", "Add silhouette scores", ) gui.auto_commit(box, self, "auto_commit", "Commit", auto_label="Auto commit", box=False) self.scene = QtGui.QGraphicsScene() self.view = QtGui.QGraphicsView(self.scene) self.view.setRenderHint(QtGui.QPainter.Antialiasing, True) self.view.setAlignment(Qt.AlignTop | Qt.AlignLeft) self.mainArea.layout().addWidget(self.view) def sizeHint(self): sh = self.controlArea.sizeHint() return sh.expandedTo(QtCore.QSize(600, 720)) @check_sql_input def set_data(self, data): """ Set the input data set. """ self.closeContext() self.clear() error_msg = "" warning_msg = "" candidatevars = [] if data is not None: candidatevars = [ v for v in data.domain.variables + data.domain.metas if v.is_discrete and len(v.values) >= 2 ] if not candidatevars: error_msg = "Input does not have any suitable cluster labels." data = None if data is not None: ncont = sum(v.is_continuous for v in data.domain.attributes) ndiscrete = len(data.domain.attributes) - ncont if ncont == 0: data = None error_msg = "No continuous columns" elif ncont < len(data.domain.attributes): warning_msg = "{0} discrete columns will not be used for " \ "distance computation".format(ndiscrete) self.data = data if data is not None: self.cluster_var_model[:] = candidatevars if data.domain.class_var in candidatevars: self.cluster_var_idx = candidatevars.index( data.domain.class_var) else: self.cluster_var_idx = 0 annotvars = [var for var in data.domain.metas if var.is_string] self.annotation_var_model[:] = ["None"] + annotvars self.annotation_var_idx = 1 if len(annotvars) else 0 self._effective_data = Orange.distance._preprocess(data) self.openContext(Orange.data.Domain(candidatevars)) self.error(0, error_msg) self.warning(0, warning_msg) def handleNewSignals(self): if self._effective_data is not None: self._update() self._replot() self.unconditional_commit() def clear(self): """ Clear the widget state. """ self.data = None self._effective_data = None self._matrix = None self._silhouette = None self._labels = None self.cluster_var_model[:] = [] self.annotation_var_model[:] = ["None"] self._clear_scene() def _clear_scene(self): # Clear the graphics scene and associated objects self.scene.clear() self.scene.setSceneRect(QRectF()) self._silplot = None def _invalidate_distances(self): # Invalidate the computed distance matrix and recompute the silhouette. self._matrix = None self._invalidate_scores() def _invalidate_scores(self): # Invalidate and recompute the current silhouette scores. self._labels = self._silhouette = None self._update() self._replot() if self.data is not None: self.commit() def _update(self): # Update/recompute the distances/scores as required if self.data is None: self._silhouette = None self._labels = None self._matrix = None self._clear_scene() return if self._matrix is None and self._effective_data is not None: _, metric = self.Distances[self.distance_idx] self._matrix = numpy.asarray(metric(self._effective_data)) labelvar = self.cluster_var_model[self.cluster_var_idx] labels, _ = self.data.get_column_view(labelvar) labels = labels.astype(int) _, counts = numpy.unique(labels, return_counts=True) if numpy.count_nonzero(counts) >= 2: self.error(1, "") silhouette = sklearn.metrics.silhouette_samples( self._matrix, labels, metric="precomputed") else: self.error(1, "Need at least 2 clusters with non zero counts") labels = silhouette = None self._labels = labels self._silhouette = silhouette def _replot(self): # Clear and replot/initialize the scene self._clear_scene() if self._silhouette is not None and self._labels is not None: var = self.cluster_var_model[self.cluster_var_idx] silplot = SilhouettePlot() silplot.setBarHeight(self.bar_size) silplot.setRowNamesVisible(self.bar_size >= 5) if self.group_by_cluster: silplot.setScores(self._silhouette, self._labels, var.values) else: silplot.setScores( self._silhouette, numpy.zeros(len(self._silhouette), dtype=int), [""]) self.scene.addItem(silplot) self._silplot = silplot self._update_annotations() silplot.resize(silplot.effectiveSizeHint(Qt.PreferredSize)) silplot.selectionChanged.connect(self.commit) self.scene.setSceneRect( QRectF(QtCore.QPointF(0, 0), self._silplot.effectiveSizeHint(Qt.PreferredSize))) def _update_bar_size(self): if self._silplot is not None: self._silplot.setBarHeight(self.bar_size) self._silplot.setRowNamesVisible(self.bar_size >= 5) self.scene.setSceneRect( QRectF(QtCore.QPointF(0, 0), self._silplot.effectiveSizeHint(Qt.PreferredSize))) def _update_annotations(self): if 0 < self.annotation_var_idx < len(self.annotation_var_model): annot_var = self.annotation_var_model[self.annotation_var_idx] else: annot_var = None if self._silplot is not None: if annot_var is not None: column, _ = self.data.get_column_view(annot_var) self._silplot.setRowNames( [annot_var.str_val(value) for value in column]) else: self._silplot.setRowNames(None) def commit(self): """ Commit/send the current selection to the output. """ selected = other = None if self.data is not None: selectedmask = numpy.full(len(self.data), False, dtype=bool) if self._silplot is not None: indices = self._silplot.selection() selectedmask[indices] = True scores = self._silhouette silhouette_var = None if self.add_scores: var = self.cluster_var_model[self.cluster_var_idx] silhouette_var = Orange.data.ContinuousVariable( "Silhouette ({})".format(escape(var.name))) domain = Orange.data.Domain( self.data.domain.attributes, self.data.domain.class_vars, self.data.domain.metas + (silhouette_var, )) else: domain = self.data.domain if numpy.count_nonzero(selectedmask): selected = self.data.from_table( domain, self.data, numpy.flatnonzero(selectedmask)) if numpy.count_nonzero(~selectedmask): other = self.data.from_table(domain, self.data, numpy.flatnonzero(~selectedmask)) if self.add_scores: if selected is not None: selected[:, silhouette_var] = numpy.c_[scores[selectedmask]] if other is not None: other[:, silhouette_var] = numpy.c_[scores[~selectedmask]] self.send("Selected Data", selected) self.send("Other Data", other) def onDeleteWidget(self): self.clear() super().onDeleteWidget()
class OWSilhouettePlot(widget.OWWidget): name = "Silhouette Plot" description = "Visually assess cluster quality and " \ "the degree of cluster membership." icon = "icons/SilhouettePlot.svg" priority = 300 keywords = [] class Inputs: data = Input("Data", (Orange.data.Table, Orange.misc.DistMatrix)) class Outputs: selected_data = Output("Selected Data", Orange.data.Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table) replaces = [ "orangecontrib.prototypes.widgets.owsilhouetteplot.OWSilhouettePlot", "Orange.widgets.unsupervised.owsilhouetteplot.OWSilhouettePlot" ] settingsHandler = settings.PerfectDomainContextHandler() #: Distance metric index distance_idx = settings.Setting(0) #: Group/cluster variable index cluster_var_idx = settings.ContextSetting(0) #: Annotation variable index annotation_var_idx = settings.ContextSetting(0) #: Group the (displayed) silhouettes by cluster group_by_cluster = settings.Setting(True) #: A fixed size for an instance bar bar_size = settings.Setting(3) #: Add silhouette scores to output data auto_commit = settings.Setting(True) pending_selection = settings.Setting(None, schema_only=True) Distances = [("Euclidean", Orange.distance.Euclidean), ("Manhattan", Orange.distance.Manhattan), ("Cosine", Orange.distance.Cosine)] graph_name = "scene" class Error(widget.OWWidget.Error): need_two_clusters = Msg("Need at least two non-empty clusters") singleton_clusters_all = Msg("All clusters are singletons") memory_error = Msg("Not enough memory") value_error = Msg("Distances could not be computed: '{}'") input_validation_error = Msg("{}") class Warning(widget.OWWidget.Warning): missing_cluster_assignment = Msg( "{} instance{s} omitted (missing cluster assignment)") nan_distances = Msg("{} instance{s} omitted (undefined distances)") ignoring_categorical = Msg("Ignoring categorical features") def __init__(self): super().__init__() #: The input data self.data = None # type: Optional[Orange.data.Table] #: The input distance matrix (if present) self.distances = None # type: Optional[Orange.misc.DistMatrix] #: The effective distance matrix (is self.distances or computed from #: self.data depending on input) self._matrix = None # type: Optional[Orange.misc.DistMatrix] #: An bool mask (size == len(data)) indicating missing group/cluster #: assignments self._mask = None # type: Optional[np.ndarray] #: An array of cluster/group labels for instances with valid group #: assignment self._labels = None # type: Optional[np.ndarray] #: An array of silhouette scores for instances with valid group #: assignment self._silhouette = None # type: Optional[np.ndarray] self._silplot = None # type: Optional[SilhouettePlot] controllayout = self.controlArea.layout() assert isinstance(controllayout, QVBoxLayout) self._distances_gui_box = distbox = gui.widgetBox( None, "Distance" ) self._distances_gui_cb = gui.comboBox( distbox, self, "distance_idx", items=[name for name, _ in OWSilhouettePlot.Distances], orientation=Qt.Horizontal, callback=self._invalidate_distances) controllayout.addWidget(distbox) box = gui.vBox(self.controlArea, "Cluster Label") self.cluster_var_cb = gui.comboBox( box, self, "cluster_var_idx", contentsLength=14, searchable=True, callback=self._invalidate_scores ) gui.checkBox( box, self, "group_by_cluster", "Group by cluster", callback=self._replot) self.cluster_var_model = itemmodels.VariableListModel(parent=self) self.cluster_var_cb.setModel(self.cluster_var_model) box = gui.vBox(self.controlArea, "Bars") gui.widgetLabel(box, "Bar width:") gui.hSlider( box, self, "bar_size", minValue=1, maxValue=10, step=1, callback=self._update_bar_size) gui.widgetLabel(box, "Annotations:") self.annotation_cb = gui.comboBox( box, self, "annotation_var_idx", contentsLength=14, callback=self._update_annotations) self.annotation_var_model = itemmodels.VariableListModel(parent=self) self.annotation_var_model[:] = ["None"] self.annotation_cb.setModel(self.annotation_var_model) ibox = gui.indentedBox(box, 5) self.ann_hidden_warning = warning = gui.widgetLabel( ibox, "(increase the width to show)") ibox.setFixedWidth(ibox.sizeHint().width()) warning.setVisible(False) gui.rubber(self.controlArea) gui.auto_send(self.buttonsArea, self, "auto_commit") self.scene = GraphicsScene(self) self.view = StickyGraphicsView(self.scene) self.view.setRenderHint(QPainter.Antialiasing, True) self.view.setAlignment(Qt.AlignTop | Qt.AlignLeft) self.mainArea.layout().addWidget(self.view) self.settingsAboutToBePacked.connect(self.pack_settings) def sizeHint(self): sh = self.controlArea.sizeHint() return sh.expandedTo(QSize(600, 720)) def pack_settings(self): if self.data and self._silplot is not None: self.pending_selection = list(self._silplot.selection()) else: self.pending_selection = None @Inputs.data @check_sql_input def set_data(self, data: Union[Table, DistMatrix, None]): """ Set the input dataset or distance matrix. """ self.closeContext() self.clear() try: if isinstance(data, Orange.misc.DistMatrix): self._set_distances(data) elif isinstance(data, Orange.data.Table): self._set_table(data) else: self.distances = None self.data = None except InputValidationError as err: self.Error.input_validation_error(err.message) self.distances = None self.data = None def _set_table(self, data: Table): self._setup_control_models(data.domain) self.data = data self.distances = None def _set_distances(self, distances: DistMatrix): if isinstance(distances.row_items, Orange.data.Table) and \ distances.axis == 1: data = distances.row_items else: raise ValidationError("Input matrix does not have associated data") if data is not None: self._setup_control_models(data.domain) self.distances = distances self.data = data def handleNewSignals(self): if not self._is_empty(): self._update() self._replot() if self.pending_selection is not None and self._silplot is not None: # If selection contains indices that are too large, the data # file must had been modified, so we ignore selection if max(self.pending_selection, default=-1) < len(self.data): self._silplot.setSelection(np.array(self.pending_selection)) self.pending_selection = None # Disable/enable the Distances GUI controls if applicable self._distances_gui_box.setEnabled(self.distances is None) self.commit.now() def _setup_control_models(self, domain: Domain): groupvars = [ v for v in domain.variables + domain.metas if v.is_discrete and len(v.values) >= 2] if not groupvars: raise NoGroupVariable() self.cluster_var_model[:] = groupvars if domain.class_var in groupvars: self.cluster_var_idx = groupvars.index(domain.class_var) else: self.cluster_var_idx = 0 annotvars = [var for var in domain.variables + domain.metas if var.is_string or var.is_discrete] self.annotation_var_model[:] = ["None"] + annotvars self.annotation_var_idx = 1 if annotvars else 0 self.openContext(Orange.data.Domain(groupvars)) def _is_empty(self) -> bool: # Is empty (does not have any input). return (self.data is None or len(self.data) == 0) \ and self.distances is None def clear(self): """ Clear the widget state. """ self.data = None self.distances = None self._matrix = None self._mask = None self._silhouette = None self._labels = None self.cluster_var_model[:] = [] self.annotation_var_model[:] = ["None"] self._clear_scene() self.Error.clear() self.Warning.clear() def _clear_scene(self): # Clear the graphics scene and associated objects self.scene.clear() self.scene.setSceneRect(QRectF()) self.view.setSceneRect(QRectF()) self.view.setHeaderSceneRect(QRectF()) self.view.setFooterSceneRect(QRectF()) self._silplot = None def _invalidate_distances(self): # Invalidate the computed distance matrix and recompute the silhouette. self._matrix = None self._invalidate_scores() def _invalidate_scores(self): # Invalidate and recompute the current silhouette scores. self._labels = self._silhouette = self._mask = None self._update() self._replot() if self.data is not None: self.commit.deferred() def _ensure_matrix(self): # ensure self._matrix is computed if necessary if self._is_empty(): return if self._matrix is None: if self.distances is not None: self._matrix = np.asarray(self.distances) elif self.data is not None: data = self.data _, metric = self.Distances[self.distance_idx] if not metric.supports_discrete and any( a.is_discrete for a in data.domain.attributes): self.Warning.ignoring_categorical() data = Orange.distance.remove_discrete_features(data) try: self._matrix = np.asarray(metric(data)) except MemoryError: self.Error.memory_error() return except ValueError as err: self.Error.value_error(str(err)) return else: assert False, "invalid state" def _update(self): # Update/recompute the effective distances and scores as required. self._clear_messages() if self._is_empty(): self._reset_all() return self._ensure_matrix() if self._matrix is None: return labelvar = self.cluster_var_model[self.cluster_var_idx] labels, _ = self.data.get_column_view(labelvar) labels = np.asarray(labels, dtype=float) cluster_mask = np.isnan(labels) dist_mask = np.isnan(self._matrix).all(axis=0) mask = cluster_mask | dist_mask labels = labels.astype(int) labels = labels[~mask] labels_unq = np.unique(labels) if len(labels_unq) < 2: self.Error.need_two_clusters() labels = silhouette = mask = None elif len(labels_unq) == len(labels): self.Error.singleton_clusters_all() labels = silhouette = mask = None else: silhouette = sklearn.metrics.silhouette_samples( self._matrix[~mask, :][:, ~mask], labels, metric="precomputed") self._mask = mask self._labels = labels self._silhouette = silhouette if mask is not None: count_missing = np.count_nonzero(cluster_mask) if count_missing: self.Warning.missing_cluster_assignment( count_missing, s="s" if count_missing > 1 else "") count_nandist = np.count_nonzero(dist_mask) if count_nandist: self.Warning.nan_distances( count_nandist, s="s" if count_nandist > 1 else "") def _reset_all(self): self._mask = None self._silhouette = None self._labels = None self._matrix = None self._clear_scene() def _clear_messages(self): self.Error.clear() self.Warning.clear() def _set_bar_height(self): visible = self.bar_size >= 5 self._silplot.setBarHeight(self.bar_size) self._silplot.setRowNamesVisible(visible) self.ann_hidden_warning.setVisible( not visible and self.annotation_var_idx > 0) def _replot(self): # Clear and replot/initialize the scene self._clear_scene() if self._silhouette is not None and self._labels is not None: var = self.cluster_var_model[self.cluster_var_idx] self._silplot = silplot = SilhouettePlot() self._set_bar_height() if self.group_by_cluster: silplot.setScores(self._silhouette, self._labels, var.values, var.colors) else: silplot.setScores( self._silhouette, np.zeros(len(self._silhouette), dtype=int), [""], np.array([[63, 207, 207]]) ) self.scene.addItem(silplot) self._update_annotations() silplot.selectionChanged.connect(self.commit.deferred) silplot.layout().activate() self._update_scene_rect() silplot.geometryChanged.connect(self._update_scene_rect) def _update_bar_size(self): if self._silplot is not None: self._set_bar_height() def _update_annotations(self): if 0 < self.annotation_var_idx < len(self.annotation_var_model): annot_var = self.annotation_var_model[self.annotation_var_idx] else: annot_var = None self.ann_hidden_warning.setVisible( self.bar_size < 5 and annot_var is not None) if self._silplot is not None: if annot_var is not None: column, _ = self.data.get_column_view(annot_var) if self._mask is not None: assert column.shape == self._mask.shape # pylint: disable=invalid-unary-operand-type column = column[~self._mask] self._silplot.setRowNames( [annot_var.str_val(value) for value in column]) else: self._silplot.setRowNames(None) def _update_scene_rect(self): geom = self._silplot.geometry() self.scene.setSceneRect(geom) self.view.setSceneRect(geom) header = self._silplot.topScaleItem() footer = self._silplot.bottomScaleItem() def extend_horizontal(rect): # type: (QRectF) -> QRectF rect = QRectF(rect) rect.setLeft(geom.left()) rect.setRight(geom.right()) return rect margin = 3 if header is not None: self.view.setHeaderSceneRect( extend_horizontal(header.geometry().adjusted(0, 0, 0, margin))) if footer is not None: self.view.setFooterSceneRect( extend_horizontal(footer.geometry().adjusted(0, -margin, 0, 0))) @gui.deferred def commit(self): """ Commit/send the current selection to the output. """ selected = indices = data = None if self.data is not None: selectedmask = np.full(len(self.data), False, dtype=bool) if self._silplot is not None: indices = self._silplot.selection() assert (np.diff(indices) > 0).all(), "strictly increasing" if self._mask is not None: # pylint: disable=invalid-unary-operand-type indices = np.flatnonzero(~self._mask)[indices] selectedmask[indices] = True if self._mask is not None: scores = np.full(shape=selectedmask.shape, fill_value=np.nan) # pylint: disable=invalid-unary-operand-type scores[~self._mask] = self._silhouette else: scores = self._silhouette var = self.cluster_var_model[self.cluster_var_idx] domain = self.data.domain proposed = "Silhouette ({})".format(escape(var.name)) names = [var.name for var in itertools.chain(domain.attributes, domain.class_vars, domain.metas)] unique = get_unique_names(names, proposed) silhouette_var = Orange.data.ContinuousVariable(unique) domain = Orange.data.Domain( domain.attributes, domain.class_vars, domain.metas + (silhouette_var, )) if np.count_nonzero(selectedmask): selected = self.data.from_table( domain, self.data, np.flatnonzero(selectedmask)) if selected is not None: with selected.unlocked(selected.metas): selected[:, silhouette_var] = np.c_[scores[selectedmask]] data = self.data.transform(domain) with data.unlocked(data.metas): data[:, silhouette_var] = np.c_[scores] self.Outputs.selected_data.send(selected) self.Outputs.annotated_data.send(create_annotated_table(data, indices)) def send_report(self): if not len(self.cluster_var_model): return self.report_plot() caption = "Silhouette plot ({} distance), clustered by '{}'".format( self.Distances[self.distance_idx][0], self.cluster_var_model[self.cluster_var_idx]) if self.annotation_var_idx and self._silplot.rowNamesVisible(): caption += ", annotated with '{}'".format( self.annotation_var_model[self.annotation_var_idx]) self.report_caption(caption) def onDeleteWidget(self): self.clear() super().onDeleteWidget()
class OWDistanceMap(widget.OWWidget): name = "Distance Map" description = "Visualize a distance matrix." icon = "icons/DistanceMap.svg" priority = 1200 keywords = [] class Inputs: distances = Input("Distances", Orange.misc.DistMatrix) class Outputs: selected_data = Output("Selected Data", Orange.data.Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table) features = Output("Features", widget.AttributeList, dynamic=False) settingsHandler = settings.PerfectDomainContextHandler() #: type of ordering to apply to matrix rows/columns NoOrdering, Clustering, OrderedClustering = 0, 1, 2 sorting = settings.Setting(NoOrdering) palette_name = settings.Setting(colorpalettes.DefaultContinuousPaletteName) color_gamma = settings.Setting(0.0) color_low = settings.Setting(0.0) color_high = settings.Setting(1.0) annotation_idx = settings.ContextSetting(0) pending_selection = settings.Setting(None, schema_only=True) autocommit = settings.Setting(True) graph_name = "grid_widget" # Disable clustering for inputs bigger than this _MaxClustering = 25000 # Disable cluster leaf ordering for inputs bigger than this _MaxOrderedClustering = 2000 def __init__(self): super().__init__() self.matrix = None self._tree = None self._ordered_tree = None self._sorted_matrix = None self._sort_indices = None self._selection = None self.sorting_cb = gui.comboBox( self.controlArea, self, "sorting", box="Element Sorting", items=["None", "Clustering", "Clustering with ordered leaves"], callback=self._invalidate_ordering) box = gui.vBox(self.controlArea, "Colors") self.color_box = gui.palette_combo_box(self.palette_name) self.color_box.currentIndexChanged.connect(self._update_color) box.layout().addWidget(self.color_box) form = QFormLayout( formAlignment=Qt.AlignLeft, labelAlignment=Qt.AlignLeft, fieldGrowthPolicy=QFormLayout.AllNonFixedFieldsGrow ) # form.addRow( # "Gamma", # gui.hSlider(box, self, "color_gamma", minValue=0.0, maxValue=1.0, # step=0.05, ticks=True, intOnly=False, # createLabel=False, callback=self._update_color) # ) form.addRow( "Low:", gui.hSlider(box, self, "color_low", minValue=0.0, maxValue=1.0, step=0.05, ticks=True, intOnly=False, createLabel=False, callback=self._update_color) ) form.addRow( "High:", gui.hSlider(box, self, "color_high", minValue=0.0, maxValue=1.0, step=0.05, ticks=True, intOnly=False, createLabel=False, callback=self._update_color) ) box.layout().addLayout(form) self.annot_combo = gui.comboBox( self.controlArea, self, "annotation_idx", box="Annotations", callback=self._invalidate_annotations, contentsLength=12) self.annot_combo.setModel(itemmodels.VariableListModel()) self.annot_combo.model()[:] = ["None", "Enumeration"] self.controlArea.layout().addStretch() gui.auto_send(self.controlArea, self, "autocommit") self.view = pg.GraphicsView(background="w") self.mainArea.layout().addWidget(self.view) self.grid_widget = pg.GraphicsWidget() self.grid = QGraphicsGridLayout() self.grid_widget.setLayout(self.grid) self.viewbox = pg.ViewBox(enableMouse=False, enableMenu=False) self.viewbox.setAcceptedMouseButtons(Qt.NoButton) self.viewbox.setAcceptHoverEvents(False) self.grid.addItem(self.viewbox, 1, 1) self.left_dendrogram = DendrogramWidget( self.grid_widget, orientation=DendrogramWidget.Left, selectionMode=DendrogramWidget.NoSelection, hoverHighlightEnabled=False ) self.left_dendrogram.setAcceptedMouseButtons(Qt.NoButton) self.left_dendrogram.setAcceptHoverEvents(False) self.top_dendrogram = DendrogramWidget( self.grid_widget, orientation=DendrogramWidget.Top, selectionMode=DendrogramWidget.NoSelection, hoverHighlightEnabled=False ) self.top_dendrogram.setAcceptedMouseButtons(Qt.NoButton) self.top_dendrogram.setAcceptHoverEvents(False) self.grid.addItem(self.left_dendrogram, 1, 0) self.grid.addItem(self.top_dendrogram, 0, 1) self.right_labels = TextList( alignment=Qt.AlignLeft | Qt.AlignVCenter, sizePolicy=QSizePolicy(QSizePolicy.Fixed, QSizePolicy.Expanding) ) self.bottom_labels = TextList( orientation=Qt.Horizontal, alignment=Qt.AlignRight | Qt.AlignVCenter, sizePolicy=QSizePolicy(QSizePolicy.Expanding, QSizePolicy.Fixed) ) self.grid.addItem(self.right_labels, 1, 2) self.grid.addItem(self.bottom_labels, 2, 1) self.view.setCentralItem(self.grid_widget) self.left_dendrogram.hide() self.top_dendrogram.hide() self.right_labels.hide() self.bottom_labels.hide() self.matrix_item = None self.dendrogram = None self.grid_widget.scene().installEventFilter(self) self.settingsAboutToBePacked.connect(self.pack_settings) def pack_settings(self): if self.matrix_item is not None: self.pending_selection = self.matrix_item.selections() else: self.pending_selection = None @Inputs.distances def set_distances(self, matrix): self.closeContext() self.clear() self.error() if matrix is not None: N, _ = matrix.shape if N < 2: self.error("Empty distance matrix.") matrix = None self.matrix = matrix if matrix is not None: self.set_items(matrix.row_items, matrix.axis) else: self.set_items(None) if matrix is not None: N, _ = matrix.shape else: N = 0 model = self.sorting_cb.model() item = model.item(2) msg = None if N > OWDistanceMap._MaxOrderedClustering: item.setFlags(item.flags() & ~Qt.ItemIsEnabled) if self.sorting == OWDistanceMap.OrderedClustering: self.sorting = OWDistanceMap.Clustering msg = "Cluster ordering was disabled due to the input " \ "matrix being to big" else: item.setFlags(item.flags() | Qt.ItemIsEnabled) item = model.item(1) if N > OWDistanceMap._MaxClustering: item.setFlags(item.flags() & ~Qt.ItemIsEnabled) if self.sorting == OWDistanceMap.Clustering: self.sorting = OWDistanceMap.NoOrdering msg = "Clustering was disabled due to the input " \ "matrix being to big" else: item.setFlags(item.flags() | Qt.ItemIsEnabled) self.information(msg) def set_items(self, items, axis=1): self.items = items model = self.annot_combo.model() if items is None: model[:] = ["None", "Enumeration"] elif not axis: model[:] = ["None", "Enumeration", "Attribute names"] elif isinstance(items, Orange.data.Table): annot_vars = list(filter_visible(items.domain.variables)) + list(items.domain.metas) model[:] = ["None", "Enumeration"] + annot_vars self.annotation_idx = 0 self.openContext(items.domain) elif isinstance(items, list) and \ all(isinstance(item, Orange.data.Variable) for item in items): model[:] = ["None", "Enumeration", "Name"] else: model[:] = ["None", "Enumeration"] self.annotation_idx = min(self.annotation_idx, len(model) - 1) def clear(self): self.matrix = None self.cluster = None self._tree = None self._ordered_tree = None self._sorted_matrix = None self._selection = [] self._clear_plot() def handleNewSignals(self): if self.matrix is not None: self._update_ordering() self._setup_scene() self._update_labels() if self.pending_selection is not None: self.matrix_item.set_selections(self.pending_selection) self.pending_selection = None self.unconditional_commit() def _clear_plot(self): def remove(item): item.setParentItem(None) item.scene().removeItem(item) if self.matrix_item is not None: self.matrix_item.selectionChanged.disconnect( self._invalidate_selection) remove(self.matrix_item) self.matrix_item = None self._set_displayed_dendrogram(None) self._set_labels(None) def _cluster_tree(self): if self._tree is None: self._tree = hierarchical.dist_matrix_clustering(self.matrix) return self._tree def _ordered_cluster_tree(self): if self._ordered_tree is None: tree = self._cluster_tree() self._ordered_tree = \ hierarchical.optimal_leaf_ordering(tree, self.matrix) return self._ordered_tree def _setup_scene(self): self._clear_plot() self.matrix_item = DistanceMapItem(self._sorted_matrix) # Scale the y axis to compensate for pg.ViewBox's y axis invert self.matrix_item.setTransform(QTransform.fromScale(1, -1), ) self.viewbox.addItem(self.matrix_item) # Set fixed view box range. h, w = self._sorted_matrix.shape self.viewbox.setRange(QRectF(0, -h, w, h), padding=0) self.matrix_item.selectionChanged.connect(self._invalidate_selection) if self.sorting == OWDistanceMap.NoOrdering: tree = None elif self.sorting == OWDistanceMap.Clustering: tree = self._cluster_tree() elif self.sorting == OWDistanceMap.OrderedClustering: tree = self._ordered_cluster_tree() self._set_displayed_dendrogram(tree) self._update_color() def _set_displayed_dendrogram(self, root): self.left_dendrogram.set_root(root) self.top_dendrogram.set_root(root) self.left_dendrogram.setVisible(root is not None) self.top_dendrogram.setVisible(root is not None) constraint = 0 if root is None else -1 # 150 self.left_dendrogram.setMaximumWidth(constraint) self.top_dendrogram.setMaximumHeight(constraint) def _invalidate_ordering(self): self._sorted_matrix = None if self.matrix is not None: self._update_ordering() self._setup_scene() self._update_labels() self._invalidate_selection() def _update_ordering(self): if self.sorting == OWDistanceMap.NoOrdering: self._sorted_matrix = self.matrix self._sort_indices = None else: if self.sorting == OWDistanceMap.Clustering: tree = self._cluster_tree() elif self.sorting == OWDistanceMap.OrderedClustering: tree = self._ordered_cluster_tree() leaves = hierarchical.leaves(tree) indices = numpy.array([leaf.value.index for leaf in leaves]) X = self.matrix self._sorted_matrix = X[indices[:, numpy.newaxis], indices[numpy.newaxis, :]] self._sort_indices = indices def _invalidate_annotations(self): if self.matrix is not None: self._update_labels() def _update_labels(self, ): if self.annotation_idx == 0: # None labels = None elif self.annotation_idx == 1: # Enumeration labels = [str(i + 1) for i in range(self.matrix.shape[0])] elif self.annot_combo.model()[self.annotation_idx] == "Attribute names": attr = self.matrix.row_items.domain.attributes labels = [str(attr[i]) for i in range(self.matrix.shape[0])] elif self.annotation_idx == 2 and \ isinstance(self.items, widget.AttributeList): labels = [v.name for v in self.items] elif isinstance(self.items, Orange.data.Table): var = self.annot_combo.model()[self.annotation_idx] column, _ = self.items.get_column_view(var) labels = [var.str_val(value) for value in column] self._set_labels(labels) def _set_labels(self, labels): self._labels = labels if labels and self.sorting != OWDistanceMap.NoOrdering: sortind = self._sort_indices labels = [labels[i] for i in sortind] for textlist in [self.right_labels, self.bottom_labels]: textlist.setItems(labels or []) textlist.setVisible(bool(labels)) constraint = -1 if labels else 0 self.right_labels.setMaximumWidth(constraint) self.bottom_labels.setMaximumHeight(constraint) def _update_color(self): palette = self.color_box.currentData() self.palette_name = palette.name if self.matrix_item: colors = palette.lookup_table(self.color_low, self.color_high) self.matrix_item.setLookupTable(colors) def _invalidate_selection(self): ranges = self.matrix_item.selections() ranges = reduce(iadd, ranges, []) indices = reduce(iadd, ranges, []) if self.sorting != OWDistanceMap.NoOrdering: sortind = self._sort_indices indices = [sortind[i] for i in indices] self._selection = list(sorted(set(indices))) self.commit() def commit(self): datasubset = None featuresubset = None if not self._selection: pass elif isinstance(self.items, Orange.data.Table): indices = self._selection if self.matrix.axis == 1: datasubset = self.items.from_table_rows(self.items, indices) elif self.matrix.axis == 0: domain = Orange.data.Domain( [self.items.domain[i] for i in indices], self.items.domain.class_vars, self.items.domain.metas) datasubset = self.items.transform(domain) elif isinstance(self.items, widget.AttributeList): subset = [self.items[i] for i in self._selection] featuresubset = widget.AttributeList(subset) self.Outputs.selected_data.send(datasubset) self.Outputs.annotated_data.send(create_annotated_table(self.items, self._selection)) self.Outputs.features.send(featuresubset) def onDeleteWidget(self): super().onDeleteWidget() self.clear() def send_report(self): annot = self.annot_combo.currentText() if self.annotation_idx <= 1: annot = annot.lower() self.report_items(( ("Sorting", self.sorting_cb.currentText().lower()), ("Annotations", annot) )) if self.matrix is not None: self.report_plot()
class OWLinePlot(OWWidget): name = "Line Plot" description = "Visualization of data profiles (e.g., time series)." icon = "icons/LinePlot.svg" priority = 1030 class Inputs: data = Input("Data", Table, default=True) data_subset = Input("Data Subset", Table) class Outputs: selected_data = Output("Selected Data", Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table) settingsHandler = settings.PerfectDomainContextHandler() group_var = settings.Setting("") #: Group by group_var's values selected_classes = settings.Setting([]) #: List of selected class indices display_index = settings.Setting(LinePlotDisplay.INSTANCES) display_quartiles = settings.Setting(False) auto_commit = settings.Setting(True) selection = settings.ContextSetting([]) class Information(OWWidget.Information): not_enough_attrs = Msg("Need at least one continuous feature.") def __init__(self, parent=None): super().__init__(parent) self.classes = [] self.data = None self.data_subset = None self.subset_selection = [] self.group_variables = [] self.graph_variables = [] self.__groups = None # Setup GUI infobox = gui.widgetBox(self.controlArea, "Info") self.infoLabel = gui.widgetLabel(infobox, "No data on input.") displaybox = gui.widgetBox(self.controlArea, "Display") radiobox = gui.radioButtons(displaybox, self, "display_index", callback=self.__update_visibility) gui.appendRadioButton(radiobox, "Line plot") gui.appendRadioButton(radiobox, "Mean") gui.appendRadioButton(radiobox, "Line plot with mean") showbox = gui.widgetBox(self.controlArea, "Show") gui.checkBox(showbox, self, "display_quartiles", "Error bars", callback=self.__update_visibility) group_box = gui.widgetBox(self.controlArea, "Group by") self.cb_attr = gui.comboBox(group_box, self, "group_var", sendSelectedValue=True, callback=self.update_group_var) self.group_listbox = gui.listBox( group_box, self, "selected_classes", "classes", selectionMode=QListWidget.MultiSelection, callback=self.__on_class_selection_changed) self.group_listbox.setVisible(False) self.gui = OWPlotGUI(self) self.box_zoom_select(self.controlArea) gui.rubber(self.controlArea) gui.auto_commit(self.controlArea, self, "auto_commit", "Send Selection", "Send Automatically") self.graph = LinePlotGraph(self) self.graph.getPlotItem().buttonsHidden = True self.graph.setRenderHint(QPainter.Antialiasing, True) self.mainArea.layout().addWidget(self.graph) def box_zoom_select(self, parent): g = self.gui box_zoom_select = gui.vBox(parent, "Zoom/Select") zoom_select_toolbar = g.zoom_select_toolbar(box_zoom_select, nomargin=True, buttons=[ g.StateButtonsBegin, g.SimpleSelect, g.Pan, g.Zoom, g.StateButtonsEnd, g.ZoomReset ]) buttons = zoom_select_toolbar.buttons buttons[g.SimpleSelect].clicked.connect(self.select_button_clicked) buttons[g.Pan].clicked.connect(self.pan_button_clicked) buttons[g.Zoom].clicked.connect(self.zoom_button_clicked) buttons[g.ZoomReset].clicked.connect(self.reset_button_clicked) return box_zoom_select def select_button_clicked(self): self.graph.state = SELECT self.graph.getViewBox().setMouseMode(self.graph.getViewBox().RectMode) def pan_button_clicked(self): self.graph.state = PANNING self.graph.getViewBox().setMouseMode(self.graph.getViewBox().PanMode) def zoom_button_clicked(self): self.graph.state = ZOOMING self.graph.getViewBox().setMouseMode(self.graph.getViewBox().RectMode) def reset_button_clicked(self): self.graph.getViewBox().autoRange() def selection_changed(self): self.selection = list(self.graph.selection) self.commit() def sizeHint(self): return QSize(800, 600) def clear(self): """ Clear/reset the widget state. """ self.cb_attr.clear() self.group_listbox.clear() self.data = None self.__groups = None self.graph.reset() self.infoLabel.setText("No data on input.") @Inputs.data def set_data(self, data): """ Set the input profile dataset. """ self.closeContext() self.clear() self.clear_messages() self.data = data if data is not None: n_instances = len(data) n_attrs = len(data.domain.attributes) self.infoLabel.setText("%i instances on input\n%i attributes" % (n_instances, n_attrs)) self.graph_variables = [ var for var in data.domain.attributes if var.is_continuous ] if len(self.graph_variables) < 1: self.Information.not_enough_attrs() else: groupvars = [ var for var in data.domain.variables + data.domain.metas if var.is_discrete ] if len(groupvars) > 0: self.cb_attr.addItems([str(var) for var in groupvars]) self.group_var = str(groupvars[0]) self.group_variables = groupvars self.update_group_var() else: self._setup_plot() self.selection = [] self.openContext(data) self.select_data_instances() self.commit() @Inputs.data_subset def set_data_subset(self, subset): """ Set the supplementary input subset dataset. """ self.data_subset = subset def handleNewSignals(self): if len(self.subset_selection) and self.data is not None: self.graph.deselect_subset(self.subset_selection) self.subset_selection = [] if self.data is not None and self.data_subset is not None: intersection = set(self.data.ids).intersection( set(self.data_subset.ids)) self.subset_selection = intersection self.graph.select_subset(self.subset_selection) def select_data_instances(self): if self.data is None or not len(self.data) or not len(self.selection): return if max(self.selection) >= len(self.data): self.selection = [] self.graph.select(self.selection) def _plot_curve(self, X, color, data, indices): dark_pen = QPen(color.darker(110), 4) dark_pen.setCosmetic(True) color.setAlpha(120) light_pen = QPen(color, 1) light_pen.setCosmetic(True) items = [] for index, instance in zip(indices, data): item = LinePlotItem(index, instance.id, X, instance.x, light_pen) item.sigClicked.connect(self.graph.select_by_click) items.append(item) self.graph.add_line_plot_item(item) mean = np.nanmean(data.X, axis=0) meancurve = pg.PlotDataItem(x=X, y=mean, pen=dark_pen, symbol="o", pxMode=True, symbolSize=5, antialias=True) self.graph.addItem(meancurve) q1, q2, q3 = np.nanpercentile(data.X, [25, 50, 75], axis=0) errorbar = pg.ErrorBarItem(x=X, y=mean, bottom=np.clip(mean - q1, 0, mean - q1), top=np.clip(q3 - mean, 0, q3 - mean), beam=0.01) self.graph.addItem(errorbar) return items, mean, meancurve, errorbar def _setup_plot(self): """Setup the plot with new curve data.""" assert self.data is not None self.graph.reset() data, domain = self.data, self.data.domain self.graph.getAxis('bottom').setTicks([[ (i + 1, str(a)) for i, a in enumerate(self.graph_variables) ]]) X = np.arange(1, len(self.graph_variables) + 1) groups = [] if not self.selected_classes: group_data = data[:, self.graph_variables] items, mean, meancurve, errorbar = self._plot_curve( X, QColor(Qt.darkGray), group_data, list(range(len(self.data)))) groups.append( namespace(data=group_data, profiles=items, mean=meancurve, boxplot=errorbar)) else: var = domain[self.group_var] class_col_data, _ = data.get_column_view(var) group_indices = [ np.flatnonzero(class_col_data == i) for i in range(len(self.classes)) ] for i, indices in enumerate(group_indices): if len(indices) == 0: groups.append(None) else: if self.classes: color = self.class_colors[i] else: color = QColor(Qt.darkGray) group_data = data[indices, self.graph_variables] items, mean, meancurve, errorbar = self._plot_curve( X, color, group_data, indices) groups.append( namespace(data=group_data, indices=indices, profiles=items, mean=meancurve, boxplot=errorbar)) self.__groups = groups self.__update_visibility() def __update_visibility(self): if self.__groups is None: return for i, group in enumerate(self.__groups): if group is not None: if self.display_index in (LinePlotDisplay.INSTANCES, LinePlotDisplay.INSTANCES_WITH_MEAN): self.graph.add_items() for item in group.profiles: item.setVisible(self.display_index in ( LinePlotDisplay.INSTANCES, LinePlotDisplay.INSTANCES_WITH_MEAN)) group.mean.setVisible(self.display_index in ( LinePlotDisplay.MEAN, LinePlotDisplay.INSTANCES_WITH_MEAN)) group.boxplot.setVisible(self.display_quartiles) def __on_class_selection_changed(self): self.__update_visibility() self.graph.deselect_all() def update_group_var(self): data_attr, _ = self.data.get_column_view(self.group_var) class_vals = self.data.domain[self.group_var].values self.classes = list(class_vals) self.class_colors = \ colorpalette.ColorPaletteGenerator(len(class_vals)) self.selected_classes = list(range(len(class_vals))) for i in range(len(class_vals)): item = self.group_listbox.item(i) item.setIcon(colorpalette.ColorPixmap(self.class_colors[i])) self._setup_plot() self.__on_class_selection_changed() def commit(self): selected = self.data[self.selection] \ if self.data is not None and len(self.selection) > 0 else None annotated = create_annotated_table(self.data, self.selection) self.Outputs.selected_data.send(selected) self.Outputs.annotated_data.send(annotated)
class OWColor(widget.OWWidget): name = "Color" description = "Set color legend for variables." icon = "icons/Colors.svg" class Inputs: data = Input("Data", Orange.data.Table) class Outputs: data = Output("Data", Orange.data.Table) settingsHandler = settings.PerfectDomainContextHandler( match_values=settings.PerfectDomainContextHandler.MATCH_VALUES_ALL) disc_descs = settings.ContextSetting([]) cont_descs = settings.ContextSetting([]) selected_schema_index = settings.Setting(0) auto_apply = settings.Setting(True) settings_version = 2 want_main_area = False def __init__(self): super().__init__() self.data = None self.orig_domain = self.domain = None box = gui.hBox(self.controlArea, "Discrete Variables") self.disc_model = DiscColorTableModel() self.disc_view = DiscreteTable(self.disc_model) self.disc_model.dataChanged.connect(self._on_data_changed) box.layout().addWidget(self.disc_view) box = gui.hBox(self.controlArea, "Numeric Variables") self.cont_model = ContColorTableModel() self.cont_view = ContinuousTable(self.cont_model) self.cont_model.dataChanged.connect(self._on_data_changed) box.layout().addWidget(self.cont_view) box = gui.auto_apply(self.controlArea, self, "auto_apply") box.button.setFixedWidth(180) save = gui.button(None, self, "Save", callback=self.save) load = gui.button(None, self, "Load", callback=self.load) reset = gui.button(None, self, "Reset", callback=self.reset) box.layout().insertWidget(0, save) box.layout().insertWidget(0, load) box.layout().insertWidget(2, reset) box.layout().insertStretch(3) self.info.set_input_summary(self.info.NoInput) self.info.set_output_summary(self.info.NoOutput) @staticmethod def sizeHint(): # pragma: no cover return QSize(500, 570) @Inputs.data def set_data(self, data): self.closeContext() self.disc_descs = [] self.cont_descs = [] if data is None: self.data = self.domain = None self.info.set_input_summary(self.info.NoInput) else: self.data = data self.info.set_input_summary(len(data), format_summary_details(data)) for var in chain(data.domain.variables, data.domain.metas): if var.is_discrete: self.disc_descs.append(DiscAttrDesc(var)) elif var.is_continuous: self.cont_descs.append(ContAttrDesc(var)) self.disc_model.set_data(self.disc_descs) self.cont_model.set_data(self.cont_descs) self.openContext(data) self.disc_view.resizeColumnsToContents() self.cont_view.resizeColumnsToContents() self.unconditional_commit() def _on_data_changed(self): self.commit() def reset(self): self.disc_model.reset() self.cont_model.reset() self.commit() def save(self): fname, _ = QFileDialog.getSaveFileName( self, "File name", self._start_dir(), "Variable definitions (*.colors)") if not fname: return QSettings().setValue("colorwidget/last-location", os.path.split(fname)[0]) self._save_var_defs(fname) def _save_var_defs(self, fname): with open(fname, "w") as f: json.dump( {vartype: { var.name: var_data for var, var_data in ( (desc.var, desc.to_dict()) for desc in repo) if var_data} for vartype, repo in (("categorical", self.disc_descs), ("numeric", self.cont_descs)) }, f, indent=4) def load(self): fname, _ = QFileDialog.getOpenFileName( self, "File name", self._start_dir(), "Variable definitions (*.colors)") if not fname: return try: f = open(fname) except IOError: QMessageBox.critical(self, "File error", "File cannot be opened.") return try: js = json.load(f) #: dict self._parse_var_defs(js) except (json.JSONDecodeError, InvalidFileFormat): QMessageBox.critical(self, "File error", "Invalid file format.") def _parse_var_defs(self, js): if not isinstance(js, dict) or set(js) != {"categorical", "numeric"}: raise InvalidFileFormat try: renames = { var_name: desc["rename"] for repo in js.values() for var_name, desc in repo.items() if "rename" in desc } # js is an object coming from json file that can be manipulated by # the user, so there are too many things that can go wrong. # Catch all exceptions, therefore. except Exception as exc: raise InvalidFileFormat from exc if not all(isinstance(val, str) for val in chain(renames, renames.values())): raise InvalidFileFormat renamed_vars = { renames.get(desc.var.name, desc.var.name) for desc in chain(self.disc_descs, self.cont_descs) } if len(renamed_vars) != len(self.disc_descs) + len(self.cont_descs): QMessageBox.warning( self, "Duplicated variable names", "Variables will not be renamed due to duplicated names.") for repo in js.values(): for desc in repo.values(): desc.pop("rename", None) # First, construct all descriptions; assign later, after we know # there won't be exceptions due to invalid file format both_descs = [] warnings = [] for old_desc, repo, desc_type in ( (self.disc_descs, "categorical", DiscAttrDesc), (self.cont_descs, "numeric", ContAttrDesc)): var_by_name = {desc.var.name: desc.var for desc in old_desc} new_descs = {} for var_name, var_data in js[repo].items(): var = var_by_name.get(var_name) if var is None: continue # This can throw InvalidFileFormat new_descs[var_name], warn = desc_type.from_dict(var, var_data) warnings += warn both_descs.append(new_descs) self.disc_descs = [both_descs[0].get(desc.var.name, desc) for desc in self.disc_descs] self.cont_descs = [both_descs[1].get(desc.var.name, desc) for desc in self.cont_descs] if warnings: QMessageBox.warning( self, "Invalid definitions", "\n".join(warnings)) self.disc_model.set_data(self.disc_descs) self.cont_model.set_data(self.cont_descs) self.unconditional_commit() def _start_dir(self): return self.workflowEnv().get("basedir") \ or QSettings().value("colorwidget/last-location") \ or os.path.expanduser(f"~{os.sep}") def commit(self): def make(variables): new_vars = [] for var in variables: source = disc_dict if var.is_discrete else cont_dict desc = source.get(var.name) new_vars.append(desc.create_variable() if desc else var) return new_vars if self.data is None: self.Outputs.data.send(None) self.info.set_output_summary(self.info.NoOutput) return disc_dict = {desc.var.name: desc for desc in self.disc_descs} cont_dict = {desc.var.name: desc for desc in self.cont_descs} dom = self.data.domain new_domain = Orange.data.Domain( make(dom.attributes), make(dom.class_vars), make(dom.metas)) new_data = self.data.transform(new_domain) self.info.set_output_summary(len(new_data), format_summary_details(new_data)) self.Outputs.data.send(new_data) def send_report(self): """Send report""" def _report_variables(variables): def was(n, o): return n if n == o else f"{n} (was: {o})" max_values = max( (len(var.values) for var in variables if var.is_discrete), default=1) rows = "" disc_dict = {k.var.name: k for k in self.disc_descs} cont_dict = {k.var.name: k for k in self.cont_descs} for var in variables: if var.is_discrete: desc = disc_dict[var.name] value_cols = " \n".join( f"<td>{square(*color)} {was(value, old_value)}</td>" for color, value, old_value in zip(desc.colors, desc.values, var.values)) elif var.is_continuous: desc = cont_dict[var.name] pal = colorpalettes.ContinuousPalettes[desc.palette_name] value_cols = f'<td colspan="{max_values}">' \ f'{pal.friendly_name}</td>' else: continue names = was(desc.name, desc.var.name) rows += '<tr style="height: 2em">\n' \ f' <th style="text-align: right">{names}</th>' \ f' {value_cols}\n' \ '</tr>\n' return rows if not self.data: return dom = self.data.domain sections = ( (name, _report_variables(variables)) for name, variables in ( ("Features", dom.attributes), ("Outcome" + "s" * (len(dom.class_vars) > 1), dom.class_vars), ("Meta attributes", dom.metas))) table = "".join(f"<tr><th>{name}</th></tr>{rows}" for name, rows in sections if rows) if table: self.report_raw(f"<table>{table}</table>") @classmethod def migrate_context(cls, _, version): if not version or version < 2: raise IncompatibleContext
class OWSilhouettePlot(widget.OWWidget): name = "Silhouette Plot" description = "Visually assess cluster quality and " \ "the degree of cluster membership." icon = "icons/SilhouettePlot.svg" priority = 300 inputs = [("Data", Orange.data.Table, "set_data")] outputs = [("Selected Data", Orange.data.Table, widget.Default), (ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table)] replaces = [ "orangecontrib.prototypes.widgets.owsilhouetteplot.OWSilhouettePlot", "Orange.widgets.unsupervised.owsilhouetteplot.OWSilhouettePlot" ] settingsHandler = settings.PerfectDomainContextHandler() #: Distance metric index distance_idx = settings.Setting(0) #: Group/cluster variable index cluster_var_idx = settings.ContextSetting(0) #: Annotation variable index annotation_var_idx = settings.ContextSetting(0) #: Group the silhouettes by cluster group_by_cluster = settings.Setting(True) #: A fixed size for an instance bar bar_size = settings.Setting(3) #: Add silhouette scores to output data add_scores = settings.Setting(False) auto_commit = settings.Setting(False) Distances = [("Euclidean", Orange.distance.Euclidean), ("Manhattan", Orange.distance.Manhattan)] graph_name = "scene" buttons_area_orientation = Qt.Vertical class Error(widget.OWWidget.Error): need_two_clusters = Msg("Need at least two non-empty clusters") def __init__(self): super().__init__() self.data = None self._effective_data = None self._matrix = None self._silhouette = None self._labels = None self._silplot = None gui.comboBox(self.controlArea, self, "distance_idx", box="Distance", items=[name for name, _ in OWSilhouettePlot.Distances], orientation=Qt.Horizontal, callback=self._invalidate_distances) box = gui.vBox(self.controlArea, "Cluster Label") self.cluster_var_cb = gui.comboBox(box, self, "cluster_var_idx", addSpace=4, callback=self._invalidate_scores) gui.checkBox(box, self, "group_by_cluster", "Group by cluster", callback=self._replot) self.cluster_var_model = itemmodels.VariableListModel(parent=self) self.cluster_var_cb.setModel(self.cluster_var_model) box = gui.vBox(self.controlArea, "Bars") gui.widgetLabel(box, "Bar width:") gui.hSlider(box, self, "bar_size", minValue=1, maxValue=10, step=1, callback=self._update_bar_size, addSpace=6) gui.widgetLabel(box, "Annotations:") self.annotation_cb = gui.comboBox(box, self, "annotation_var_idx", callback=self._update_annotations) self.annotation_var_model = itemmodels.VariableListModel(parent=self) self.annotation_var_model[:] = ["None"] self.annotation_cb.setModel(self.annotation_var_model) ibox = gui.indentedBox(box, 5) self.ann_hidden_warning = warning = gui.widgetLabel( ibox, "(increase the width to show)") ibox.setFixedWidth(ibox.sizeHint().width()) warning.setVisible(False) gui.rubber(self.controlArea) gui.separator(self.buttonsArea) box = gui.vBox(self.buttonsArea, "Output") # Thunk the call to commit to call conditional commit gui.checkBox(box, self, "add_scores", "Add silhouette scores", callback=lambda: self.commit()) gui.auto_commit(box, self, "auto_commit", "Commit", auto_label="Auto commit", box=False) # Ensure that the controlArea is not narrower than buttonsArea self.controlArea.layout().addWidget(self.buttonsArea) self.scene = QGraphicsScene() self.view = QGraphicsView(self.scene) self.view.setRenderHint(QPainter.Antialiasing, True) self.view.setAlignment(Qt.AlignTop | Qt.AlignLeft) self.mainArea.layout().addWidget(self.view) def sizeHint(self): sh = self.controlArea.sizeHint() return sh.expandedTo(QSize(600, 720)) @check_sql_input def set_data(self, data): """ Set the input data set. """ self.closeContext() self.clear() error_msg = "" warning_msg = "" candidatevars = [] if data is not None: candidatevars = [ v for v in data.domain.variables + data.domain.metas if v.is_discrete and len(v.values) >= 2 ] if not candidatevars: error_msg = "Input does not have any suitable cluster labels." data = None if data is not None: ncont = sum(v.is_continuous for v in data.domain.attributes) ndiscrete = len(data.domain.attributes) - ncont if ncont == 0: data = None error_msg = "No continuous columns" elif ncont < len(data.domain.attributes): warning_msg = "{0} discrete columns will not be used for " \ "distance computation".format(ndiscrete) self.data = data if data is not None: self.cluster_var_model[:] = candidatevars if data.domain.class_var in candidatevars: self.cluster_var_idx = \ candidatevars.index(data.domain.class_var) else: self.cluster_var_idx = 0 annotvars = [var for var in data.domain.metas if var.is_string] self.annotation_var_model[:] = ["None"] + annotvars self.annotation_var_idx = 1 if len(annotvars) else 0 self._effective_data = Orange.distance._preprocess(data) self.openContext(Orange.data.Domain(candidatevars)) self.error(error_msg) self.warning(warning_msg) def handleNewSignals(self): if self._effective_data is not None: self._update() self._replot() self.unconditional_commit() def clear(self): """ Clear the widget state. """ self.data = None self._effective_data = None self._matrix = None self._silhouette = None self._labels = None self.cluster_var_model[:] = [] self.annotation_var_model[:] = ["None"] self._clear_scene() def _clear_scene(self): # Clear the graphics scene and associated objects self.scene.clear() self.scene.setSceneRect(QRectF()) self._silplot = None def _invalidate_distances(self): # Invalidate the computed distance matrix and recompute the silhouette. self._matrix = None self._invalidate_scores() def _invalidate_scores(self): # Invalidate and recompute the current silhouette scores. self._labels = self._silhouette = None self._update() self._replot() if self.data is not None: self.commit() def _update(self): # Update/recompute the distances/scores as required if self.data is None: self._silhouette = None self._labels = None self._matrix = None self._clear_scene() return if self._matrix is None and self._effective_data is not None: _, metric = self.Distances[self.distance_idx] self._matrix = numpy.asarray(metric(self._effective_data)) labelvar = self.cluster_var_model[self.cluster_var_idx] labels, _ = self.data.get_column_view(labelvar) labels = labels.astype(int) _, counts = numpy.unique(labels, return_counts=True) if numpy.count_nonzero(counts) >= 2: self.Error.need_two_clusters.clear() silhouette = sklearn.metrics.silhouette_samples( self._matrix, labels, metric="precomputed") else: self.Error.need_two_clusters() labels = silhouette = None self._labels = labels self._silhouette = silhouette def _set_bar_height(self): visible = self.bar_size >= 5 self._silplot.setBarHeight(self.bar_size) self._silplot.setRowNamesVisible(visible) self.ann_hidden_warning.setVisible(not visible and self.annotation_var_idx > 0) def _replot(self): # Clear and replot/initialize the scene self._clear_scene() if self._silhouette is not None and self._labels is not None: var = self.cluster_var_model[self.cluster_var_idx] self._silplot = silplot = SilhouettePlot() self._set_bar_height() if self.group_by_cluster: silplot.setScores(self._silhouette, self._labels, var.values) else: silplot.setScores( self._silhouette, numpy.zeros(len(self._silhouette), dtype=int), [""]) self.scene.addItem(silplot) self._update_annotations() silplot.resize(silplot.effectiveSizeHint(Qt.PreferredSize)) silplot.selectionChanged.connect(self.commit) self.scene.setSceneRect( QRectF(QPointF(0, 0), self._silplot.effectiveSizeHint(Qt.PreferredSize))) def _update_bar_size(self): if self._silplot is not None: self._set_bar_height() self.scene.setSceneRect( QRectF(QPointF(0, 0), self._silplot.effectiveSizeHint(Qt.PreferredSize))) def _update_annotations(self): if 0 < self.annotation_var_idx < len(self.annotation_var_model): annot_var = self.annotation_var_model[self.annotation_var_idx] else: annot_var = None self.ann_hidden_warning.setVisible(self.bar_size < 5 and annot_var is not None) if self._silplot is not None: if annot_var is not None: column, _ = self.data.get_column_view(annot_var) self._silplot.setRowNames( [annot_var.str_val(value) for value in column]) else: self._silplot.setRowNames(None) def commit(self): """ Commit/send the current selection to the output. """ selected = indices = data = None if self.data is not None: selectedmask = numpy.full(len(self.data), False, dtype=bool) if self._silplot is not None: indices = self._silplot.selection() selectedmask[indices] = True scores = self._silhouette silhouette_var = None if self.add_scores: var = self.cluster_var_model[self.cluster_var_idx] silhouette_var = Orange.data.ContinuousVariable( "Silhouette ({})".format(escape(var.name))) domain = Orange.data.Domain( self.data.domain.attributes, self.data.domain.class_vars, self.data.domain.metas + (silhouette_var, )) data = self.data.from_table(domain, self.data) else: domain = self.data.domain data = self.data if numpy.count_nonzero(selectedmask): selected = self.data.from_table( domain, self.data, numpy.flatnonzero(selectedmask)) if self.add_scores: if selected is not None: selected[:, silhouette_var] = numpy.c_[scores[selectedmask]] data[:, silhouette_var] = numpy.c_[scores] self.send("Selected Data", selected) self.send(ANNOTATED_DATA_SIGNAL_NAME, create_annotated_table(data, indices)) def send_report(self): if not len(self.cluster_var_model): return self.report_plot() caption = "Silhouette plot ({} distance), clustered by '{}'".format( self.Distances[self.distance_idx][0], self.cluster_var_model[self.cluster_var_idx]) if self.annotation_var_idx and self._silplot.rowNamesVisible(): caption += ", annotated with '{}'".format( self.annotation_var_model[self.annotation_var_idx]) self.report_caption(caption) def onDeleteWidget(self): self.clear() super().onDeleteWidget()
class OWColor(widget.OWWidget): name = "Color" description = "Set color legend for variables." icon = "icons/Colors.svg" class Inputs: data = Input("Data", Orange.data.Table) class Outputs: data = Output("Data", Orange.data.Table) settingsHandler = settings.PerfectDomainContextHandler( match_values=settings.PerfectDomainContextHandler.MATCH_VALUES_ALL) disc_descs = settings.ContextSetting([]) cont_descs = settings.ContextSetting([]) color_settings = settings.Setting(None) selected_schema_index = settings.Setting(0) auto_apply = settings.Setting(True) settings_version = 2 want_main_area = False def __init__(self): super().__init__() self.data = None self.orig_domain = self.domain = None box = gui.hBox(self.controlArea, "Discrete Variables") self.disc_model = DiscColorTableModel() self.disc_view = DiscreteTable(self.disc_model) self.disc_model.dataChanged.connect(self._on_data_changed) box.layout().addWidget(self.disc_view) box = gui.hBox(self.controlArea, "Numeric Variables") self.cont_model = ContColorTableModel() self.cont_view = ContinuousTable(self.cont_model) self.cont_model.dataChanged.connect(self._on_data_changed) box.layout().addWidget(self.cont_view) box = gui.auto_apply(self.controlArea, self, "auto_apply") box.button.setFixedWidth(180) box.layout().insertStretch(0) @staticmethod def sizeHint(): # pragma: no cover return QSize(500, 570) @Inputs.data def set_data(self, data): self.closeContext() self.disc_descs = [] self.cont_descs = [] if data is None: self.data = self.domain = None else: self.data = data for var in chain(data.domain.variables, data.domain.metas): if var.is_discrete: self.disc_descs.append(DiscAttrDesc(var)) elif var.is_continuous: self.cont_descs.append(ContAttrDesc(var)) self.disc_model.set_data(self.disc_descs) self.cont_model.set_data(self.cont_descs) self.openContext(data) self.disc_view.resizeColumnsToContents() self.cont_view.resizeColumnsToContents() self.unconditional_commit() def _on_data_changed(self): self.commit() def commit(self): def make(variables): new_vars = [] for var in variables: source = disc_dict if var.is_discrete else cont_dict desc = source.get(var.name) new_vars.append(desc.create_variable() if desc else var) return new_vars if self.data is None: self.Outputs.data.send(None) return disc_dict = {desc.var.name: desc for desc in self.disc_descs} cont_dict = {desc.var.name: desc for desc in self.cont_descs} dom = self.data.domain new_domain = Orange.data.Domain(make(dom.attributes), make(dom.class_vars), make(dom.metas)) new_data = self.data.transform(new_domain) self.Outputs.data.send(new_data) def send_report(self): """Send report""" def _report_variables(variables): from Orange.widgets.report import colored_square as square def was(n, o): return n if n == o else f"{n} (was: {o})" max_values = max( (len(var.values) for var in variables if var.is_discrete), default=1) rows = "" disc_dict = {k.var.name: k for k in self.disc_descs} cont_dict = {k.var.name: k for k in self.cont_descs} for var in variables: if var.is_discrete: desc = disc_dict[var.name] value_cols = " \n".join( f"<td>{square(*color)} {was(value, old_value)}</td>" for color, value, old_value in zip( desc.colors, desc.values, var.values)) elif var.is_continuous: desc = cont_dict[var.name] pal = colorpalettes.ContinuousPalettes[desc.palette_name] value_cols = f'<td colspan="{max_values}">' \ f'{pal.friendly_name}</td>' else: continue names = was(desc.name, desc.var.name) rows += '<tr style="height: 2em">\n' \ f' <th style="text-align: right">{names}</th>' \ f' {value_cols}\n' \ '</tr>\n' return rows if not self.data: return dom = self.data.domain sections = ((name, _report_variables(variables)) for name, variables in (("Features", dom.attributes), ("Outcome" + "s" * (len(dom.class_vars) > 1), dom.class_vars), ("Meta attributes", dom.metas))) table = "".join(f"<tr><th>{name}</th></tr>{rows}" for name, rows in sections if rows) if table: self.report_raw(r"<table>{table}</table>") @classmethod def migrate_context(cls, context, version): if not version or version < 2: raise IncompatibleContext
class OWReshape(widget.OWWidget): name = "To Shopping List" description = "Reshape from a 'wide' records table to 'long' format." icon = "icons/ToShoppingList.svg" inputs = [("Data", Orange.data.Table, "set_data")] outputs = [("Data", Orange.data.Table)] want_main_area = False resizing_enabled = False settingsHandler = settings.PerfectDomainContextHandler(metas_in_res=True) idvar = settings.ContextSetting(0) # type: Orange.data.Variable def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.data = None # type: Orange.data.Table self.idvar_model = itemmodels.VariableListModel(parent=self) self.item_var_name = "Item" self.value_var_name = "Rating" box = gui.widgetBox(self.controlArea, "Info") self.info_text = gui.widgetLabel(box, "No data") box = gui.widgetBox(self.controlArea, "Id var") self.var_cb = gui.comboBox(box, self, "idvar", callback=self._invalidate) self.var_cb.setMinimumContentsLength(16) self.var_cb.setModel(self.idvar_model) gui.lineEdit(self.controlArea, self, "item_var_name", box="Item name", callback=self._invalidate) gui.lineEdit(self.controlArea, self, "value_var_name", box="Value name", callback=self._invalidate) def sizeHint(self): return QSize(300, 50) def clear(self): self.data = None self.idvar_model[:] = [] self.error("") def set_data(self, data): self.closeContext() self.clear() idvars = [] if data is not None: domain = data.domain idvars = [var for var in domain.metas + domain.variables if isinstance(var, (Orange.data.DiscreteVariable, Orange.data.StringVariable))] if not idvars: self.error("No suitable id columns.") data = None self.data = data if self.data is not None: self.idvar_model[:] = idvars self.openContext(data) self.info_text.setText("Data with {} instances".format(len(data))) else: self.info_text.setText("No data") self.commit() def _invalidate(self): self.commit() def commit(self): if self.data is None: self.send("Data", None) return self.error("") data, domain = self.data, self.data.domain idvar = self.idvar_model[self.idvar] itemvars = [var for var in domain.attributes if var is not idvar] item_names = [v.name for v in itemvars] if len(set(item_names)) != len(itemvars): self.error("Duplicate column names") self.send("Data", None) return item_var = Orange.data.DiscreteVariable(self.item_var_name, values=item_names) value_var = Orange.data.ContinuousVariable(self.value_var_name) try: outdata = reshape_long(data, idvar, item_var, value_var) except ValueError as err: self.error(str(err)) outdata = None self.send("Data", outdata)
class OWLinePlot(OWWidget): name = "Line Plot" description = "Visualization of data profiles (e.g., time series)." icon = "icons/LinePlot.svg" priority = 1030 class Inputs: data = Input("Data", Table, default=True) data_subset = Input("Data Subset", Table) class Outputs: selected_data = Output("Selected Data", Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table) settingsHandler = settings.PerfectDomainContextHandler() group_var = settings.ContextSetting(None) display_index = settings.Setting(LinePlotDisplay.INSTANCES) display_quartiles = settings.Setting(False) auto_commit = settings.Setting(True) selection = settings.ContextSetting([]) class Information(OWWidget.Information): not_enough_attrs = Msg("Need at least one continuous feature.") def __init__(self, parent=None): super().__init__(parent) self.__groups = None self.__profiles = None self.data = None self.data_subset = None self.subset_selection = [] self.graph_variables = [] # Setup GUI infobox = gui.widgetBox(self.controlArea, "Info") self.infoLabel = gui.widgetLabel(infobox, "No data on input.") displaybox = gui.widgetBox(self.controlArea, "Display") radiobox = gui.radioButtons(displaybox, self, "display_index", callback=self.__update_visibility) gui.appendRadioButton(radiobox, "Line plot") gui.appendRadioButton(radiobox, "Mean") gui.appendRadioButton(radiobox, "Line plot with mean") showbox = gui.widgetBox(self.controlArea, "Show") gui.checkBox(showbox, self, "display_quartiles", "Error bars", callback=self.__update_visibility) self.group_vars = DomainModel( placeholder="None", separators=False, valid_types=DiscreteVariable) self.group_view = gui.listView( self.controlArea, self, "group_var", box="Group by", model=self.group_vars, callback=self.__group_var_changed) self.group_view.setEnabled(False) self.group_view.setMinimumSize(QSize(30, 100)) self.group_view.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Ignored) self.gui = OWPlotGUI(self) self.box_zoom_select(self.controlArea) gui.rubber(self.controlArea) gui.auto_commit(self.controlArea, self, "auto_commit", "Send Selection", "Send Automatically") self.graph = LinePlotGraph(self) self.graph.getPlotItem().buttonsHidden = True self.graph.setRenderHint(QPainter.Antialiasing, True) self.mainArea.layout().addWidget(self.graph) def box_zoom_select(self, parent): g = self.gui box_zoom_select = gui.vBox(parent, "Zoom/Select") zoom_select_toolbar = g.zoom_select_toolbar( box_zoom_select, nomargin=True, buttons=[g.StateButtonsBegin, g.SimpleSelect, g.Pan, g.Zoom, g.StateButtonsEnd, g.ZoomReset] ) buttons = zoom_select_toolbar.buttons buttons[g.SimpleSelect].clicked.connect(self.select_button_clicked) buttons[g.Pan].clicked.connect(self.pan_button_clicked) buttons[g.Zoom].clicked.connect(self.zoom_button_clicked) buttons[g.ZoomReset].clicked.connect(self.reset_button_clicked) return box_zoom_select def select_button_clicked(self): self.graph.state = SELECT self.graph.getViewBox().setMouseMode(self.graph.getViewBox().RectMode) def pan_button_clicked(self): self.graph.state = PANNING self.graph.getViewBox().setMouseMode(self.graph.getViewBox().PanMode) def zoom_button_clicked(self): self.graph.state = ZOOMING self.graph.getViewBox().setMouseMode(self.graph.getViewBox().RectMode) def reset_button_clicked(self): self.graph.getViewBox().autoRange() def selection_changed(self): self.selection = list(self.graph.selection) self.commit() def sizeHint(self): return QSize(800, 600) def clear(self): """ Clear/reset the widget state. """ self.__groups = None self.__profiles = None self.graph_variables = [] self.graph.reset() self.infoLabel.setText("No data on input.") self.group_vars.set_domain(None) self.group_view.setEnabled(False) @Inputs.data def set_data(self, data): """ Set the input profile dataset. """ self.closeContext() self.clear() self.clear_messages() self.data = data self.selection = [] if data is not None: self.group_vars.set_domain(data.domain) self.group_view.setEnabled(len(self.group_vars) > 1) self.group_var = data.domain.class_var if \ data.domain.class_var and data.domain.class_var.is_discrete \ else None self.infoLabel.setText("%i instances on input\n%i attributes" % ( len(data), len(data.domain.attributes))) self.graph_variables = [var for var in data.domain.attributes if var.is_continuous] if len(self.graph_variables) < 1: self.Information.not_enough_attrs() self.commit() return self.openContext(data) self._setup_plot() self.commit() @Inputs.data_subset def set_data_subset(self, subset): """ Set the supplementary input subset dataset. """ self.data_subset = subset if len(self.subset_selection): self.graph.deselect_subset() def handleNewSignals(self): self.subset_selection = [] if self.data is not None and self.data_subset is not None and \ len(self.graph_variables): intersection = set(self.data.ids).intersection( set(self.data_subset.ids)) self.subset_selection = intersection if self.__profiles is not None: self.graph.select_subset(self.subset_selection) def _setup_plot(self): """Setup the plot with new curve data.""" if self.data is None: return self.graph.reset() ticks = [[(i + 1, str(a)) for i, a in enumerate(self.graph_variables)]] self.graph.getAxis('bottom').setTicks(ticks) if self.display_index in (LinePlotDisplay.INSTANCES, LinePlotDisplay.INSTANCES_WITH_MEAN): self._plot_profiles() self._plot_groups() self.__update_visibility() def _plot_profiles(self): X = np.arange(1, len(self.graph_variables) + 1) data = self.data[:, self.graph_variables] self.__profiles = [] for index, inst in zip(range(len(self.data)), data): color = self.__get_line_color(index) profile = LinePlotItem(index, inst.id, X, inst.x, color) profile.sigClicked.connect(self.graph.select_by_click) self.graph.add_line_plot_item(profile) self.__profiles.append(profile) self.graph.finished_adding() self.__select_data_instances() def _plot_groups(self): if self.__groups is not None: for group in self.__groups: if group is not None: self.graph.getViewBox().removeItem(group.mean) self.graph.getViewBox().removeItem(group.error_bar) self.__groups = [] X = np.arange(1, len(self.graph_variables) + 1) if self.group_var is None: self.__plot_mean_with_error(X, self.data[:, self.graph_variables]) else: class_col_data, _ = self.data.get_column_view(self.group_var) group_indices = [np.flatnonzero(class_col_data == i) for i in range(len(self.group_var.values))] for index, indices in enumerate(group_indices): if len(indices) == 0: self.__groups.append(None) else: group_data = self.data[indices, self.graph_variables] self.__plot_mean_with_error(X, group_data, index) def __plot_mean_with_error(self, X, data, index=None): pen = QPen(self.__get_line_color(None, index), 4) pen.setCosmetic(True) mean = np.nanmean(data.X, axis=0) mean_curve = pg.PlotDataItem(x=X, y=mean, pen=pen, symbol="o", symbolSize=5, antialias=True) self.graph.addItem(mean_curve) q1, q2, q3 = np.nanpercentile(data.X, [25, 50, 75], axis=0) bottom = np.clip(mean - q1, 0, mean - q1) top = np.clip(q3 - mean, 0, q3 - mean) error_bar = pg.ErrorBarItem(x=X, y=mean, bottom=bottom, top=top, beam=0.01) self.graph.addItem(error_bar) self.__groups.append(namespace(mean=mean_curve, error_bar=error_bar)) def __update_visibility(self): self.__update_visibility_profiles() self.__update_visibility_groups() def __update_visibility_groups(self): show_mean = self.display_index in (LinePlotDisplay.MEAN, LinePlotDisplay.INSTANCES_WITH_MEAN) if self.__groups is not None: for group in self.__groups: if group is not None: group.mean.setVisible(show_mean) group.error_bar.setVisible(self.display_quartiles) def __update_visibility_profiles(self): show_inst = self.display_index in (LinePlotDisplay.INSTANCES, LinePlotDisplay.INSTANCES_WITH_MEAN) if self.__profiles is None and show_inst: self._plot_profiles() self.graph.select_subset(self.subset_selection) if self.__profiles is not None: for profile in self.__profiles: profile.setVisible(show_inst) def __group_var_changed(self): if self.data is None or not len(self.graph_variables): return self.__color_profiles() self._plot_groups() self.__update_visibility() def __color_profiles(self): if self.__profiles is not None: for profile in self.__profiles: profile.setColor(self.__get_line_color(profile.index)) def __select_data_instances(self): if self.data is None or not len(self.data) or not len(self.selection): return if max(self.selection) >= len(self.data): self.selection = [] self.graph.select(self.selection) def __get_line_color(self, data_index=None, mean_index=None): color = QColor(LinePlotColors.DEFAULT_COLOR) if self.group_var is not None: if data_index is not None: value = self.data[data_index][self.group_var] if np.isnan(value): return color index = int(value) if data_index is not None else mean_index color = LinePlotColors()(len(self.group_var.values))[index] return color.darker(110) if data_index is None else color def commit(self): selected = self.data[self.selection] \ if self.data is not None and len(self.selection) > 0 else None annotated = create_annotated_table(self.data, self.selection) self.Outputs.selected_data.send(selected) self.Outputs.annotated_data.send(annotated)
class OWColor(widget.OWWidget): name = "Color" description = "Set color legend for variables" icon = "icons/Colors.svg" inputs = [("Data", Orange.data.Table, "set_data")] outputs = [("Data", Orange.data.Table)] settingsHandler = settings.PerfectDomainContextHandler() disc_data = settings.ContextSetting([]) cont_data = settings.ContextSetting([]) color_settings = settings.Setting(None) selected_schema_index = settings.Setting(0) auto_apply = settings.Setting(True) want_main_area = False def __init__(self): super().__init__() self.data = None self.disc_colors = [] self.cont_colors = [] box = gui.widgetBox(self.controlArea, "Discrete variables", orientation="horizontal") self.disc_model = DiscColorTableModel() self.disc_view = DiscreteTable(self.disc_model) self.disc_model.dataChanged.connect(self._on_data_changed) box.layout().addWidget(self.disc_view) box = gui.widgetBox(self.controlArea, "Numeric variables", orientation="horizontal") self.cont_model = ContColorTableModel() self.cont_view = ContinuousTable(self, self.cont_model) self.cont_model.dataChanged.connect(self._on_data_changed) box.layout().addWidget(self.cont_view) box = gui.widgetBox(self.controlArea, orientation="horizontal") gui.auto_commit(box, self, "auto_apply", "Send data", box=box, checkbox_label="Resend data on every change ") gui.rubber(box) def set_data(self, data): self.closeContext() self.disc_colors = [] self.cont_colors = [] if data is None: self.data = self.domain = None else: def create_part(variables): vars = [] for i, var in enumerate(variables): if var.is_discrete or var.is_continuous: var = var.make_proxy() if var.is_discrete: var.values = var.values[:] self.disc_colors.append(var) else: self.cont_colors.append(var) vars.append(var) return vars domain = data.domain domain = Orange.data.Domain(create_part(domain.attributes), create_part(domain.class_vars), create_part(domain.metas)) self.openContext(data) self.data = Orange.data.Table(domain, data) self.data.domain = domain self.disc_model.set_data(self.disc_colors) self.cont_model.set_data(self.cont_colors) self.disc_view.resizeColumnsToContents() self.cont_view.resizeColumnsToContents() self.commit() def storeSpecificSettings(self): # Store the colors that were changed -- but not others self.current_context.disc_data = \ [(var.name, var.values, "colors" in var.attributes and var.colors) for var in self.disc_colors] self.current_context.cont_data = \ [(var.name, "colors" in var.attributes and var.colors) for var in self.cont_colors] def retrieveSpecificSettings(self): disc_data = getattr(self.current_context, "disc_data", ()) for var, (name, values, colors) in zip(self.disc_colors, disc_data): var.name = name var.values = values[:] if colors is not False: var.colors = colors cont_data = getattr(self.current_context, "cont_data", ()) for var, (name, colors) in zip(self.cont_colors, cont_data): var.name = name if colors is not False: var.colors = colors def _on_data_changed(self, *args): self.commit() def commit(self): self.send("Data", self.data)
class OWTestLearners(OWWidget): name = "Test & Score" description = "Cross-validation accuracy estimation." icon = "icons/TestLearners1.svg" priority = 100 inputs = [("Learner", Learner, "set_learner", widget.Multiple), ("Data", Table, "set_train_data", widget.Default), ("Test Data", Table, "set_test_data"), ("Preprocessor", Preprocess, "set_preprocessor")] outputs = [("Predictions", Table), ("Evaluation Results", Results)] settings_version = 2 settingsHandler = settings.PerfectDomainContextHandler(metas_in_res=True) #: Resampling/testing types KFold, FeatureFold, ShuffleSplit, LeaveOneOut, TestOnTrain, TestOnTest \ = 0, 1, 2, 3, 4, 5 #: Numbers of folds NFolds = [2, 3, 5, 10, 20] #: Number of repetitions NRepeats = [2, 3, 5, 10, 20, 50, 100] #: Sample sizes SampleSizes = [5, 10, 20, 25, 30, 33, 40, 50, 60, 66, 70, 75, 80, 90, 95] #: Selected resampling type resampling = settings.Setting(0) #: Number of folds for K-fold cross validation n_folds = settings.Setting(3) #: Stratified sampling for K-fold cv_stratified = settings.Setting(True) #: Number of repeats for ShuffleSplit sampling n_repeats = settings.Setting(3) #: ShuffleSplit sample size sample_size = settings.Setting(9) #: Stratified sampling for Random Sampling shuffle_stratified = settings.Setting(True) # CV where nr. of feature values determines nr. of folds fold_feature = settings.ContextSetting(None) fold_feature_selected = settings.ContextSetting(False) TARGET_AVERAGE = "(Average over classes)" class_selection = settings.ContextSetting(TARGET_AVERAGE) class Error(OWWidget.Error): train_data_empty = Msg("Train data set is empty.") test_data_empty = Msg("Test data set is empty.") class_required = Msg("Train data input requires a target variable.") too_many_classes = Msg("Too many target variables.") class_required_test = Msg( "Test data input requires a target variable.") too_many_folds = Msg("Number of folds exceeds the data size") class_inconsistent = Msg("Test and train data sets " "have different target variables.") class Warning(OWWidget.Warning): missing_data = \ Msg("Instances with unknown target values were removed from{}data.") test_data_missing = Msg("Missing separate test data input.") scores_not_computed = Msg("Some scores could not be computed.") test_data_unused = Msg("Test data is present but unused. " "Select 'Test on test data' to use it.") class Information(OWWidget.Information): data_sampled = Msg("Train data has been sampled") test_data_sampled = Msg("Test data has been sampled") def __init__(self): super().__init__() self.data = None self.test_data = None self.preprocessor = None self.train_data_missing_vals = False self.test_data_missing_vals = False #: An Ordered dictionary with current inputs and their testing results. self.learners = OrderedDict() sbox = gui.vBox(self.controlArea, "Sampling") rbox = gui.radioButtons(sbox, self, "resampling", callback=self._param_changed) gui.appendRadioButton(rbox, "Cross validation") ibox = gui.indentedBox(rbox) gui.comboBox(ibox, self, "n_folds", label="Number of folds: ", items=[str(x) for x in self.NFolds], maximumContentsLength=3, orientation=Qt.Horizontal, callback=self.kfold_changed) gui.checkBox(ibox, self, "cv_stratified", "Stratified", callback=self.kfold_changed) gui.appendRadioButton(rbox, "Cross validation by feature") ibox = gui.indentedBox(rbox) self.feature_model = DomainModel(order=DomainModel.METAS, valid_types=DiscreteVariable) self.features_combo = gui.comboBox(ibox, self, "fold_feature", model=self.feature_model, orientation=Qt.Horizontal, callback=self.fold_feature_changed) gui.appendRadioButton(rbox, "Random sampling") ibox = gui.indentedBox(rbox) gui.comboBox(ibox, self, "n_repeats", label="Repeat train/test: ", items=[str(x) for x in self.NRepeats], maximumContentsLength=3, orientation=Qt.Horizontal, callback=self.shuffle_split_changed) gui.comboBox(ibox, self, "sample_size", label="Training set size: ", items=["{} %".format(x) for x in self.SampleSizes], maximumContentsLength=5, orientation=Qt.Horizontal, callback=self.shuffle_split_changed) gui.checkBox(ibox, self, "shuffle_stratified", "Stratified", callback=self.shuffle_split_changed) gui.appendRadioButton(rbox, "Leave one out") gui.appendRadioButton(rbox, "Test on train data") gui.appendRadioButton(rbox, "Test on test data") self.cbox = gui.vBox(self.controlArea, "Target Class") self.class_selection_combo = gui.comboBox( self.cbox, self, "class_selection", items=[], sendSelectedValue=True, valueType=str, callback=self._on_target_class_changed, contentsLength=8) gui.rubber(self.controlArea) self.view = gui.TableView(wordWrap=True, ) header = self.view.horizontalHeader() header.setSectionResizeMode(QHeaderView.ResizeToContents) header.setDefaultAlignment(Qt.AlignCenter) header.setStretchLastSection(False) self.result_model = QStandardItemModel(self) self.result_model.setHorizontalHeaderLabels(["Method"]) self.view.setModel(self.result_model) self.view.setItemDelegate(ItemDelegate()) box = gui.vBox(self.mainArea, "Evaluation Results") box.layout().addWidget(self.view) def sizeHint(self): return QSize(780, 1) def _update_controls(self): self.fold_feature = None self.feature_model.set_domain(None) if self.data: self.feature_model.set_domain(self.data.domain) if self.fold_feature is None and self.feature_model: self.fold_feature = self.feature_model[0] enabled = bool(self.feature_model) self.controls.resampling.buttons[ OWTestLearners.FeatureFold].setEnabled(enabled) self.features_combo.setEnabled(enabled) if self.resampling == OWTestLearners.FeatureFold and not enabled: self.resampling = OWTestLearners.KFold def set_learner(self, learner, key): """ Set the input `learner` for `key`. """ if key in self.learners and learner is None: # Removed del self.learners[key] else: self.learners[key] = Input(learner, None, None) self._invalidate([key]) def set_train_data(self, data): """ Set the input training dataset. """ self.Information.data_sampled.clear() self.Error.train_data_empty.clear() if data is not None and not len(data): self.Error.train_data_empty() data = None if data and not data.domain.class_vars: self.Error.class_required() data = None elif data and len(data.domain.class_vars) > 1: self.Error.too_many_classes() data = None else: self.Error.class_required.clear() self.Error.too_many_classes.clear() if isinstance(data, SqlTable): if data.approx_len() < AUTO_DL_LIMIT: data = Table(data) else: self.Information.data_sampled() data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(AUTO_DL_LIMIT, partial=True) data = Table(data_sample) self.train_data_missing_vals = \ data is not None and np.isnan(data.Y).any() if self.train_data_missing_vals or self.test_data_missing_vals: self.Warning.missing_data(self._which_missing_data()) if data: data = RemoveNaNClasses(data) else: self.Warning.missing_data.clear() self.data = data self.closeContext() self._update_controls() if data is not None: self._update_class_selection() self.openContext(data.domain) if self.fold_feature_selected and bool(self.feature_model): self.resampling = OWTestLearners.FeatureFold self._invalidate() def set_test_data(self, data): """ Set the input separate testing dataset. """ self.Information.test_data_sampled.clear() self.Error.test_data_empty.clear() if data is not None and not len(data): self.Error.test_data_empty() data = None if data and not data.domain.class_var: self.Error.class_required() data = None else: self.Error.class_required_test.clear() if isinstance(data, SqlTable): if data.approx_len() < AUTO_DL_LIMIT: data = Table(data) else: self.Information.test_data_sampled() data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(AUTO_DL_LIMIT, partial=True) data = Table(data_sample) self.test_data_missing_vals = \ data is not None and np.isnan(data.Y).any() if self.train_data_missing_vals or self.test_data_missing_vals: self.Warning.missing_data(self._which_missing_data()) if data: data = RemoveNaNClasses(data) else: self.Warning.missing_data.clear() self.test_data = data if self.resampling == OWTestLearners.TestOnTest: self._invalidate() def _which_missing_data(self): return { (True, True): " ", # both, don't specify (True, False): " train ", (False, True): " test " }[(self.train_data_missing_vals, self.test_data_missing_vals)] def set_preprocessor(self, preproc): """ Set the input preprocessor to apply on the training data. """ self.preprocessor = preproc self._invalidate() def handleNewSignals(self): """Reimplemented from OWWidget.handleNewSignals.""" self._update_class_selection() self.commit() def kfold_changed(self): self.resampling = OWTestLearners.KFold self._param_changed() def fold_feature_changed(self): self.resampling = OWTestLearners.FeatureFold self._param_changed() def shuffle_split_changed(self): self.resampling = OWTestLearners.ShuffleSplit self._param_changed() def _param_changed(self): self._invalidate() def _update_results(self): """ Run/evaluate the learners. """ self.Warning.test_data_unused.clear() self.Warning.test_data_missing.clear() self.warning() self.Error.class_inconsistent.clear() self.Error.too_many_folds.clear() self.error() if self.data is None: return class_var = self.data.domain.class_var if self.resampling == OWTestLearners.TestOnTest: if self.test_data is None: if not self.Error.test_data_empty.is_shown(): self.Warning.test_data_missing() return elif self.test_data.domain.class_var != class_var: self.Error.class_inconsistent() return # items in need of an update items = [(key, slot) for key, slot in self.learners.items() if slot.results is None] learners = [slot.learner for _, slot in items] if len(items) == 0: return if self.test_data is not None and \ self.resampling != OWTestLearners.TestOnTest: self.Warning.test_data_unused() rstate = 42 def update_progress(finished): self.progressBarSet(100 * finished) common_args = dict( store_data=True, preprocessor=self.preprocessor, callback=update_progress, n_jobs=-1, ) self.setStatusMessage("Running") with self.progressBar(): try: folds = self.NFolds[self.n_folds] if self.resampling == OWTestLearners.KFold: if len(self.data) < folds: self.Error.too_many_folds() return warnings = [] results = Orange.evaluation.CrossValidation( self.data, learners, k=folds, random_state=rstate, warnings=warnings, **common_args) if warnings: self.warning(warnings[0]) elif self.resampling == OWTestLearners.FeatureFold: results = Orange.evaluation.CrossValidationFeature( self.data, learners, self.fold_feature, **common_args) elif self.resampling == OWTestLearners.LeaveOneOut: results = Orange.evaluation.LeaveOneOut( self.data, learners, **common_args) elif self.resampling == OWTestLearners.ShuffleSplit: train_size = self.SampleSizes[self.sample_size] / 100 results = Orange.evaluation.ShuffleSplit( self.data, learners, n_resamples=self.NRepeats[self.n_repeats], train_size=train_size, test_size=None, stratified=self.shuffle_stratified, random_state=rstate, **common_args) elif self.resampling == OWTestLearners.TestOnTrain: results = Orange.evaluation.TestOnTrainingData( self.data, learners, **common_args) elif self.resampling == OWTestLearners.TestOnTest: results = Orange.evaluation.TestOnTestData( self.data, self.test_data, learners, **common_args) else: assert False except (RuntimeError, ValueError) as e: self.error(str(e)) self.setStatusMessage("") return else: self.error() self.puts_results(learners, results, class_var) self.setStatusMessage("") def puts_results(self, learners, results, class_var): """ Called by _update_results. This method prepares calculated results and put them into self.learners. """ learner_key = { slot.learner: key for key, slot in self.learners.items() } for learner, result in zip(learners, results.split_by_model()): stats = None if class_var.is_primitive(): scorers = classification_stats.scores if class_var.is_discrete \ else regression_stats.scores ex = result.failed[0] if ex: stats = [Try.Fail(ex)] * len(scorers) result = Try.Fail(ex) else: stats = [Try(lambda: score(result)) for score in scorers] result = Try.Success(result) if learner in learner_key: key = learner_key.get(learner) self.learners[key] = \ self.learners[key]._replace(results=result, stats=stats) def _update_header(self): # Set the correct horizontal header labels on the results_model. headers = ["Method"] if self.data is not None: if self.data.domain.has_discrete_class: headers.extend(classification_stats.headers) else: headers.extend(regression_stats.headers) # remove possible extra columns from the model. for i in reversed(range(len(headers), self.result_model.columnCount())): self.result_model.takeColumn(i) self.result_model.setHorizontalHeaderLabels(headers) def _update_stats_model(self): # Update the results_model with up to date scores. # Note: The target class specific scores (if requested) are # computed as needed in this method. model = self.view.model() # clear the table model, but preserving the header labels for r in reversed(range(model.rowCount())): model.takeRow(r) target_index = None if self.data is not None: class_var = self.data.domain.class_var if self.data.domain.has_discrete_class and \ self.class_selection != self.TARGET_AVERAGE: target_index = class_var.values.index(self.class_selection) else: class_var = None errors = [] has_missing_scores = False for key, slot in self.learners.items(): name = learner_name(slot.learner) head = QStandardItem(name) head.setData(key, Qt.UserRole) if isinstance(slot.results, Try.Fail): head.setToolTip(str(slot.results.exception)) head.setText("{} (error)".format(name)) head.setForeground(QtGui.QBrush(Qt.red)) errors.append("{name} failed with error:\n" "{exc.__class__.__name__}: {exc!s}".format( name=name, exc=slot.results.exception)) row = [head] if class_var is not None and class_var.is_discrete and \ target_index is not None: if slot.results is not None and slot.results.success: ovr_results = results_one_vs_rest(slot.results.value, target_index) stats = [ Try(lambda: score(ovr_results)) for score in classification_stats.scores ] else: stats = None else: stats = slot.stats if stats is not None: for stat in stats: item = QStandardItem() if stat.success: item.setText("{:.3f}".format(stat.value[0])) else: item.setToolTip(str(stat.exception)) has_missing_scores = True row.append(item) model.appendRow(row) self.error("\n".join(errors), shown=bool(errors)) self.Warning.scores_not_computed(shown=has_missing_scores) def _update_class_selection(self): self.class_selection_combo.setCurrentIndex(-1) self.class_selection_combo.clear() if not self.data: return if self.data.domain.has_discrete_class: self.cbox.setVisible(True) class_var = self.data.domain.class_var items = [self.TARGET_AVERAGE] + class_var.values self.class_selection_combo.addItems(items) class_index = 0 if self.class_selection in class_var.values: class_index = class_var.values.index(self.class_selection) + 1 self.class_selection_combo.setCurrentIndex(class_index) self.class_selection = items[class_index] else: self.cbox.setVisible(False) def _on_target_class_changed(self): self._update_stats_model() def _invalidate(self, which=None): self.fold_feature_selected = \ self.resampling == OWTestLearners.FeatureFold # Invalidate learner results for `which` input keys # (if None then all learner results are invalidated) if which is None: which = self.learners.keys() model = self.view.model() statmodelkeys = [ model.item(row, 0).data(Qt.UserRole) for row in range(model.rowCount()) ] for key in which: self.learners[key] = \ self.learners[key]._replace(results=None, stats=None) if key in statmodelkeys: row = statmodelkeys.index(key) for c in range(1, model.columnCount()): item = model.item(row, c) if item is not None: item.setData(None, Qt.DisplayRole) item.setData(None, Qt.ToolTipRole) self.commit() def commit(self): """Recompute and output the results""" self._update_header() # Update the view to display the model names self._update_stats_model() self._update_results() self._update_stats_model() valid = [ slot for slot in self.learners.values() if slot.results is not None and slot.results.success ] if valid: # Evaluation results combined = results_merge([slot.results.value for slot in valid]) combined.learner_names = [ learner_name(slot.learner) for slot in valid ] # Predictions & Probabilities predictions = combined.get_augmented_data(combined.learner_names) else: combined = None predictions = None self.send("Evaluation Results", combined) self.send("Predictions", predictions) def send_report(self): """Report on the testing schema and results""" if not self.data or not self.learners: return if self.resampling == self.KFold: stratified = 'Stratified ' if self.cv_stratified else '' items = [("Sampling type", "{}{}-fold Cross validation".format( stratified, self.NFolds[self.n_folds]))] elif self.resampling == self.LeaveOneOut: items = [("Sampling type", "Leave one out")] elif self.resampling == self.ShuffleSplit: stratified = 'Stratified ' if self.shuffle_stratified else '' items = [ ("Sampling type", "{}Shuffle split, {} random samples with {}% data ".format( stratified, self.NRepeats[self.n_repeats], self.SampleSizes[self.sample_size])) ] elif self.resampling == self.TestOnTrain: items = [("Sampling type", "No sampling, test on training data")] elif self.resampling == self.TestOnTest: items = [("Sampling type", "No sampling, test on testing data")] else: items = [] if self.data.domain.has_discrete_class: items += [("Target class", self.class_selection.strip("()"))] if items: self.report_items("Settings", items) self.report_table("Scores", self.view) @classmethod def migrate_settings(cls, settings_, version): if version < 2: if not hasattr(settings_["context_settings"][0], "attributes"): settings_["context_settings"][0].attributes = {} if settings_["resampling"] > 0: settings_["resampling"] += 1
class OWColor(widget.OWWidget): name = "Color" description = "Set color legend for variables." icon = "icons/Colors.svg" class Inputs: data = Input("Data", Orange.data.Table) class Outputs: data = Output("Data", Orange.data.Table) settingsHandler = settings.PerfectDomainContextHandler( match_values=settings.PerfectDomainContextHandler.MATCH_VALUES_ALL) disc_colors = settings.ContextSetting([]) cont_colors = settings.ContextSetting([]) color_settings = settings.Setting(None) selected_schema_index = settings.Setting(0) auto_apply = settings.Setting(True) want_main_area = False def __init__(self): super().__init__() self.data = None self.orig_domain = self.domain = None self.disc_dict = {} self.cont_dict = {} box = gui.hBox(self.controlArea, "Discrete Variables") self.disc_model = DiscColorTableModel() disc_view = self.disc_view = DiscreteTable(self.disc_model) disc_view.horizontalHeader().setSectionResizeMode( QHeaderView.ResizeToContents) self.disc_model.dataChanged.connect(self._on_data_changed) box.layout().addWidget(disc_view) box = gui.hBox(self.controlArea, "Numeric Variables") self.cont_model = ContColorTableModel() cont_view = self.cont_view = ContinuousTable(self, self.cont_model) cont_view.setColumnWidth(1, 256) self.cont_model.dataChanged.connect(self._on_data_changed) box.layout().addWidget(cont_view) box = gui.auto_apply(self.controlArea, self, "auto_apply") box.button.setFixedWidth(180) box.layout().insertStretch(0) @staticmethod def sizeHint(): return QSize(500, 570) @Inputs.data def set_data(self, data): """Handle data input signal""" self.closeContext() self.disc_colors = [] self.cont_colors = [] if data is None: self.data = self.domain = None else: self.data = data for var in chain(data.domain.variables, data.domain.metas): if var.is_discrete: self.disc_colors.append(AttrDesc(var)) elif var.is_continuous: self.cont_colors.append(AttrDesc(var)) self.disc_model.set_data(self.disc_colors) self.cont_model.set_data(self.cont_colors) self.disc_view.resizeColumnsToContents() self.cont_view.resizeColumnsToContents() self.openContext(data) self.disc_dict = {k.var.name: k for k in self.disc_colors} self.cont_dict = {k.var.name: k for k in self.cont_colors} self.unconditional_commit() def _on_data_changed(self, *args): self.commit() def commit(self): def make(vars): new_vars = [] for var in vars: source = self.disc_dict if var.is_discrete else self.cont_dict desc = source.get(var.name) if desc: name = desc.get_name() if var.is_discrete: var = var.copy(name=name, values=desc.get_values()) else: var = var.copy(name=name) var.colors = desc.colors new_vars.append(var) return new_vars if self.data is None: self.Outputs.data.send(None) return dom = self.data.domain new_domain = Orange.data.Domain( make(dom.attributes), make(dom.class_vars), make(dom.metas)) new_data = self.data.transform(new_domain) self.Outputs.data.send(new_data) def send_report(self): """Send report""" def _report_variables(variables): from Orange.widgets.report import colored_square as square def was(n, o): return n if n == o else f"{n} (was: {o})" # definition of td element for continuous gradient # with support for pre-standard css (needed at least for Qt 4.8) max_values = max( (len(var.values) for var in variables if var.is_discrete), default=1) defs = ("-webkit-", "-o-", "-moz-", "") cont_tpl = '<td colspan="{}">' \ '<span class="legend-square" style="width: 100px; '.\ format(max_values) + \ " ".join(map( "background: {}linear-gradient(" "left, rgb({{}}, {{}}, {{}}), {{}}rgb({{}}, {{}}, {{}}));" .format, defs)) + \ '"></span></td>' rows = "" for var in variables: if var.is_discrete: desc = self.disc_dict[var.name] values = " \n".join( "<td>{} {}</td>". format(square(*color), was(value, old_value)) for color, value, old_value in zip(desc.get_colors(), desc.get_values(), var.values)) elif var.is_continuous: desc = self.cont_dict[var.name] col = desc.get_colors() colors = col[0][:3] + ("black, " * col[2], ) + col[1][:3] values = cont_tpl.format(*colors * len(defs)) else: continue names = was(desc.get_name(), desc.var.name) rows += '<tr style="height: 2em">\n' \ ' <th style="text-align: right">{}</th>{}\n</tr>\n'. \ format(names, values) return rows if not self.data: return dom = self.data.domain sections = ( (name, _report_variables(vars)) for name, vars in ( ("Features", dom.attributes), ("Outcome" + "s" * (len(dom.class_vars) > 1), dom.class_vars), ("Meta attributes", dom.metas))) table = "".join("<tr><th>{}</th></tr>{}".format(name, rows) for name, rows in sections if rows) if table: self.report_raw("<table>{}</table>".format(table))
class OWTestLearners(OWWidget): name = "Test & Score" description = "Cross-validation accuracy estimation." icon = "icons/TestLearners1.svg" priority = 100 class Inputs: train_data = Input("Data", Table, default=True) test_data = Input("Test Data", Table) learner = Input("Learner", Learner, multiple=True) preprocessor = Input("Preprocessor", Preprocess) class Outputs: predictions = Output("Predictions", Table) evaluations_results = Output("Evaluation Results", Results) settings_version = 3 UserAdviceMessages = [ widget.Message("Click on the table header to select shown columns", "click_header") ] settingsHandler = settings.PerfectDomainContextHandler() #: Resampling/testing types KFold, FeatureFold, ShuffleSplit, LeaveOneOut, TestOnTrain, TestOnTest \ = 0, 1, 2, 3, 4, 5 #: Numbers of folds NFolds = [2, 3, 5, 10, 20] #: Number of repetitions NRepeats = [2, 3, 5, 10, 20, 50, 100] #: Sample sizes SampleSizes = [5, 10, 20, 25, 30, 33, 40, 50, 60, 66, 70, 75, 80, 90, 95] #: Selected resampling type resampling = settings.Setting(0) #: Number of folds for K-fold cross validation n_folds = settings.Setting(3) #: Stratified sampling for K-fold cv_stratified = settings.Setting(True) #: Number of repeats for ShuffleSplit sampling n_repeats = settings.Setting(3) #: ShuffleSplit sample size sample_size = settings.Setting(9) #: Stratified sampling for Random Sampling shuffle_stratified = settings.Setting(True) # CV where nr. of feature values determines nr. of folds fold_feature = settings.ContextSetting(None) fold_feature_selected = settings.ContextSetting(False) TARGET_AVERAGE = "(Average over classes)" class_selection = settings.ContextSetting(TARGET_AVERAGE) BUILTIN_ORDER = { DiscreteVariable: ("AUC", "CA", "F1", "Precision", "Recall"), ContinuousVariable: ("MSE", "RMSE", "MAE", "R2") } shown_scores = \ settings.Setting(set(chain(*BUILTIN_ORDER.values()))) class Error(OWWidget.Error): train_data_empty = Msg("Train data set is empty.") test_data_empty = Msg("Test data set is empty.") class_required = Msg("Train data input requires a target variable.") too_many_classes = Msg("Too many target variables.") class_required_test = Msg( "Test data input requires a target variable.") too_many_folds = Msg("Number of folds exceeds the data size") class_inconsistent = Msg("Test and train data sets " "have different target variables.") memory_error = Msg("Not enough memory.") only_one_class_var_value = Msg("Target variable has only one value.") class Warning(OWWidget.Warning): missing_data = \ Msg("Instances with unknown target values were removed from{}data.") test_data_missing = Msg("Missing separate test data input.") scores_not_computed = Msg("Some scores could not be computed.") test_data_unused = Msg("Test data is present but unused. " "Select 'Test on test data' to use it.") class Information(OWWidget.Information): data_sampled = Msg("Train data has been sampled") test_data_sampled = Msg("Test data has been sampled") def __init__(self): super().__init__() self.data = None self.test_data = None self.preprocessor = None self.train_data_missing_vals = False self.test_data_missing_vals = False self.scorers = [] #: An Ordered dictionary with current inputs and their testing results. self.learners = OrderedDict() # type: Dict[Any, Input] self.__state = State.Waiting # Do we need to [re]test any learners, set by _invalidate and # cleared by __update self.__needupdate = False self.__task = None # type: Optional[Task] self.__executor = ThreadExecutor() sbox = gui.vBox(self.controlArea, "Sampling") rbox = gui.radioButtons(sbox, self, "resampling", callback=self._param_changed) gui.appendRadioButton(rbox, "Cross validation") ibox = gui.indentedBox(rbox) gui.comboBox(ibox, self, "n_folds", label="Number of folds: ", items=[str(x) for x in self.NFolds], maximumContentsLength=3, orientation=Qt.Horizontal, callback=self.kfold_changed) gui.checkBox(ibox, self, "cv_stratified", "Stratified", callback=self.kfold_changed) gui.appendRadioButton(rbox, "Cross validation by feature") ibox = gui.indentedBox(rbox) self.feature_model = DomainModel(order=DomainModel.METAS, valid_types=DiscreteVariable) self.features_combo = gui.comboBox(ibox, self, "fold_feature", model=self.feature_model, orientation=Qt.Horizontal, callback=self.fold_feature_changed) gui.appendRadioButton(rbox, "Random sampling") ibox = gui.indentedBox(rbox) gui.comboBox(ibox, self, "n_repeats", label="Repeat train/test: ", items=[str(x) for x in self.NRepeats], maximumContentsLength=3, orientation=Qt.Horizontal, callback=self.shuffle_split_changed) gui.comboBox(ibox, self, "sample_size", label="Training set size: ", items=["{} %".format(x) for x in self.SampleSizes], maximumContentsLength=5, orientation=Qt.Horizontal, callback=self.shuffle_split_changed) gui.checkBox(ibox, self, "shuffle_stratified", "Stratified", callback=self.shuffle_split_changed) gui.appendRadioButton(rbox, "Leave one out") gui.appendRadioButton(rbox, "Test on train data") gui.appendRadioButton(rbox, "Test on test data") self.cbox = gui.vBox(self.controlArea, "Target Class") self.class_selection_combo = gui.comboBox( self.cbox, self, "class_selection", items=[], sendSelectedValue=True, valueType=str, callback=self._on_target_class_changed, contentsLength=8) gui.rubber(self.controlArea) self.view = gui.TableView(wordWrap=True, ) header = self.view.horizontalHeader() header.setSectionResizeMode(QHeaderView.ResizeToContents) header.setDefaultAlignment(Qt.AlignCenter) header.setStretchLastSection(False) header.setContextMenuPolicy(Qt.CustomContextMenu) header.customContextMenuRequested.connect(self.show_column_chooser) self.result_model = QStandardItemModel(self) self.result_model.setHorizontalHeaderLabels(["Method"]) self.view.setModel(self.result_model) self.view.setItemDelegate(ItemDelegate()) box = gui.vBox(self.mainArea, "Evaluation Results") box.layout().addWidget(self.view) def sizeHint(self): return QSize(780, 1) def _update_controls(self): self.fold_feature = None self.feature_model.set_domain(None) if self.data: self.feature_model.set_domain(self.data.domain) if self.fold_feature is None and self.feature_model: self.fold_feature = self.feature_model[0] enabled = bool(self.feature_model) self.controls.resampling.buttons[ OWTestLearners.FeatureFold].setEnabled(enabled) self.features_combo.setEnabled(enabled) if self.resampling == OWTestLearners.FeatureFold and not enabled: self.resampling = OWTestLearners.KFold @Inputs.learner def set_learner(self, learner, key): """ Set the input `learner` for `key`. Parameters ---------- learner : Optional[Orange.base.Learner] key : Any """ if key in self.learners and learner is None: # Removed self._invalidate([key]) del self.learners[key] else: self.learners[key] = InputLearner(learner, None, None) self._invalidate([key]) @Inputs.train_data def set_train_data(self, data): """ Set the input training dataset. Parameters ---------- data : Optional[Orange.data.Table] """ self.Information.data_sampled.clear() self.Error.train_data_empty.clear() self.Error.class_required.clear() self.Error.too_many_classes.clear() self.Error.only_one_class_var_value.clear() if data is not None and not len(data): self.Error.train_data_empty() data = None if data: conds = [ not data.domain.class_vars, len(data.domain.class_vars) > 1, data.domain.has_discrete_class and len(data.domain.class_var.values) == 1 ] errors = [ self.Error.class_required, self.Error.too_many_classes, self.Error.only_one_class_var_value ] for cond, error in zip(conds, errors): if cond: error() data = None break if isinstance(data, SqlTable): if data.approx_len() < AUTO_DL_LIMIT: data = Table(data) else: self.Information.data_sampled() data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(AUTO_DL_LIMIT, partial=True) data = Table(data_sample) self.train_data_missing_vals = \ data is not None and np.isnan(data.Y).any() if self.train_data_missing_vals or self.test_data_missing_vals: self.Warning.missing_data(self._which_missing_data()) if data: data = HasClass()(data) else: self.Warning.missing_data.clear() self.data = data self.closeContext() self._update_scorers() self._update_controls() if data is not None: self._update_class_selection() self.openContext(data.domain) if self.fold_feature_selected and bool(self.feature_model): self.resampling = OWTestLearners.FeatureFold self._invalidate() @Inputs.test_data def set_test_data(self, data): # type: (Orange.data.Table) -> None """ Set the input separate testing dataset. Parameters ---------- data : Optional[Orange.data.Table] """ self.Information.test_data_sampled.clear() self.Error.test_data_empty.clear() if data is not None and not len(data): self.Error.test_data_empty() data = None if data and not data.domain.class_var: self.Error.class_required_test() data = None else: self.Error.class_required_test.clear() if isinstance(data, SqlTable): if data.approx_len() < AUTO_DL_LIMIT: data = Table(data) else: self.Information.test_data_sampled() data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(AUTO_DL_LIMIT, partial=True) data = Table(data_sample) self.test_data_missing_vals = \ data is not None and np.isnan(data.Y).any() if self.train_data_missing_vals or self.test_data_missing_vals: self.Warning.missing_data(self._which_missing_data()) if data: data = HasClass()(data) else: self.Warning.missing_data.clear() self.test_data = data if self.resampling == OWTestLearners.TestOnTest: self._invalidate() def _which_missing_data(self): return { (True, True): " ", # both, don't specify (True, False): " train ", (False, True): " test " }[(self.train_data_missing_vals, self.test_data_missing_vals)] # List of scorers shouldn't be retrieved globally, when the module is # loading since add-ons could have registered additional scorers. # It could have been cached but # - we don't gain much with it # - it complicates the unit tests def _update_scorers(self): if self.data is None or self.data.domain.class_var is None: self.scorers = [] return class_var = self.data and self.data.domain.class_var order = { name: i for i, name in enumerate(self.BUILTIN_ORDER[type(class_var)]) } # 'abstract' is retrieved from __dict__ to avoid inheriting usable = (cls for cls in scoring.Score.registry.values() if cls.is_scalar and not cls.__dict__.get("abstract") and isinstance(class_var, cls.class_types)) self.scorers = sorted(usable, key=lambda cls: order.get(cls.name, 99)) @Inputs.preprocessor def set_preprocessor(self, preproc): """ Set the input preprocessor to apply on the training data. """ self.preprocessor = preproc self._invalidate() def handleNewSignals(self): """Reimplemented from OWWidget.handleNewSignals.""" self._update_class_selection() self._update_header() self._update_stats_model() if self.__needupdate: self.__update() def kfold_changed(self): self.resampling = OWTestLearners.KFold self._param_changed() def fold_feature_changed(self): self.resampling = OWTestLearners.FeatureFold self._param_changed() def shuffle_split_changed(self): self.resampling = OWTestLearners.ShuffleSplit self._param_changed() def _param_changed(self): self._invalidate() self.__update() def _update_header(self): # Set the correct horizontal header labels on the results_model. model = self.result_model model.setColumnCount(1 + len(self.scorers)) for col, score in enumerate(self.scorers): item = QStandardItem(score.name) item.setToolTip(score.long_name) model.setHorizontalHeaderItem(col + 1, item) self._update_shown_columns() def _update_shown_columns(self): # pylint doesn't know that self.shown_scores is a set, not a Setting # pylint: disable=unsupported-membership-test model = self.result_model header = self.view.horizontalHeader() for section in range(1, model.columnCount()): col_name = model.horizontalHeaderItem(section).data(Qt.DisplayRole) header.setSectionHidden(section, col_name not in self.shown_scores) def _update_stats_model(self): # Update the results_model with up to date scores. # Note: The target class specific scores (if requested) are # computed as needed in this method. model = self.view.model() # clear the table model, but preserving the header labels for r in reversed(range(model.rowCount())): model.takeRow(r) target_index = None if self.data is not None: class_var = self.data.domain.class_var if self.data.domain.has_discrete_class and \ self.class_selection != self.TARGET_AVERAGE: target_index = class_var.values.index(self.class_selection) else: class_var = None errors = [] has_missing_scores = False for key, slot in self.learners.items(): name = learner_name(slot.learner) head = QStandardItem(name) head.setData(key, Qt.UserRole) if isinstance(slot.results, Try.Fail): head.setToolTip(str(slot.results.exception)) head.setText("{} (error)".format(name)) head.setForeground(QtGui.QBrush(Qt.red)) errors.append("{name} failed with error:\n" "{exc.__class__.__name__}: {exc!s}".format( name=name, exc=slot.results.exception)) row = [head] if class_var is not None and class_var.is_discrete and \ target_index is not None: if slot.results is not None and slot.results.success: ovr_results = results_one_vs_rest(slot.results.value, target_index) # Cell variable is used immediatelly, it's not stored # pylint: disable=cell-var-from-loop stats = [ Try(scorer_caller(scorer, ovr_results)) for scorer in self.scorers ] else: stats = None else: stats = slot.stats if stats is not None: for stat in stats: item = QStandardItem() if stat.success: item.setText("{:.3f}".format(stat.value[0])) else: item.setToolTip(str(stat.exception)) has_missing_scores = True row.append(item) model.appendRow(row) self.error("\n".join(errors), shown=bool(errors)) self.Warning.scores_not_computed(shown=has_missing_scores) def _update_class_selection(self): self.class_selection_combo.setCurrentIndex(-1) self.class_selection_combo.clear() if not self.data: return if self.data.domain.has_discrete_class: self.cbox.setVisible(True) class_var = self.data.domain.class_var items = [self.TARGET_AVERAGE] + class_var.values self.class_selection_combo.addItems(items) class_index = 0 if self.class_selection in class_var.values: class_index = class_var.values.index(self.class_selection) + 1 self.class_selection_combo.setCurrentIndex(class_index) self.class_selection = items[class_index] else: self.cbox.setVisible(False) def _on_target_class_changed(self): self._update_stats_model() def _invalidate(self, which=None): self.fold_feature_selected = \ self.resampling == OWTestLearners.FeatureFold # Invalidate learner results for `which` input keys # (if None then all learner results are invalidated) if which is None: which = self.learners.keys() model = self.view.model() statmodelkeys = [ model.item(row, 0).data(Qt.UserRole) for row in range(model.rowCount()) ] for key in which: self.learners[key] = \ self.learners[key]._replace(results=None, stats=None) if key in statmodelkeys: row = statmodelkeys.index(key) for c in range(1, model.columnCount()): item = model.item(row, c) if item is not None: item.setData(None, Qt.DisplayRole) item.setData(None, Qt.ToolTipRole) self.__needupdate = True def show_column_chooser(self, pos): # pylint doesn't know that self.shown_scores is a set, not a Setting # pylint: disable=unsupported-membership-test def update(col_name, checked): if checked: self.shown_scores.add(col_name) else: self.shown_scores.remove(col_name) self._update_shown_columns() menu = QMenu() model = self.result_model header = self.view.horizontalHeader() for section in range(1, model.columnCount()): col_name = model.horizontalHeaderItem(section).data(Qt.DisplayRole) action = menu.addAction(col_name) action.setCheckable(True) action.setChecked(col_name in self.shown_scores) action.triggered.connect(partial(update, col_name)) menu.exec(header.mapToGlobal(pos)) def commit(self): """ Commit the results to output. """ self.Error.memory_error.clear() valid = [ slot for slot in self.learners.values() if slot.results is not None and slot.results.success ] combined = None predictions = None if valid: # Evaluation results combined = results_merge([slot.results.value for slot in valid]) combined.learner_names = [ learner_name(slot.learner) for slot in valid ] # Predictions & Probabilities try: predictions = combined.get_augmented_data( combined.learner_names) except MemoryError: self.Error.memory_error() self.Outputs.evaluations_results.send(combined) self.Outputs.predictions.send(predictions) def send_report(self): """Report on the testing schema and results""" if not self.data or not self.learners: return if self.resampling == self.KFold: stratified = 'Stratified ' if self.cv_stratified else '' items = [("Sampling type", "{}{}-fold Cross validation".format( stratified, self.NFolds[self.n_folds]))] elif self.resampling == self.LeaveOneOut: items = [("Sampling type", "Leave one out")] elif self.resampling == self.ShuffleSplit: stratified = 'Stratified ' if self.shuffle_stratified else '' items = [ ("Sampling type", "{}Shuffle split, {} random samples with {}% data ".format( stratified, self.NRepeats[self.n_repeats], self.SampleSizes[self.sample_size])) ] elif self.resampling == self.TestOnTrain: items = [("Sampling type", "No sampling, test on training data")] elif self.resampling == self.TestOnTest: items = [("Sampling type", "No sampling, test on testing data")] else: items = [] if self.data.domain.has_discrete_class: items += [("Target class", self.class_selection.strip("()"))] if items: self.report_items("Settings", items) self.report_table("Scores", self.view) @classmethod def migrate_settings(cls, settings_, version): if version < 2: if settings_["resampling"] > 0: settings_["resampling"] += 1 if version < 3: # Older version used an incompatible context handler settings_["context_settings"] = [ c for c in settings_.get("context_settings", ()) if not hasattr(c, 'classes') ] @Slot(float) def setProgressValue(self, value): self.progressBarSet(value, processEvents=False) def __update(self): self.__needupdate = False assert self.__task is None or self.__state == State.Running if self.__state == State.Running: self.cancel() self.Warning.test_data_unused.clear() self.Warning.test_data_missing.clear() self.warning() self.Error.class_inconsistent.clear() self.Error.too_many_folds.clear() self.error() # check preconditions and return early if self.data is None: self.__state = State.Waiting self.commit() return if not self.learners: self.__state = State.Waiting self.commit() return if self.resampling == OWTestLearners.KFold and \ len(self.data) < self.NFolds[self.n_folds]: self.Error.too_many_folds() self.__state = State.Waiting self.commit() return elif self.resampling == OWTestLearners.TestOnTest: if self.test_data is None: if not self.Error.test_data_empty.is_shown(): self.Warning.test_data_missing() self.__state = State.Waiting self.commit() return elif self.test_data.domain.class_var != self.data.domain.class_var: self.Error.class_inconsistent() self.__state = State.Waiting self.commit() return elif self.test_data is not None: self.Warning.test_data_unused() rstate = 42 common_args = dict( store_data=True, preprocessor=self.preprocessor, ) # items in need of an update items = [(key, slot) for key, slot in self.learners.items() if slot.results is None] learners = [slot.learner for _, slot in items] # deepcopy all learners as they are not thread safe (by virtue of # the base API). These will be the effective learner objects tested # but will be replaced with the originals on return (see restore # learners bellow) learners_c = [copy.deepcopy(learner) for learner in learners] if self.resampling == OWTestLearners.KFold: folds = self.NFolds[self.n_folds] test_f = partial(Orange.evaluation.CrossValidation, self.data, learners_c, k=folds, random_state=rstate, **common_args) elif self.resampling == OWTestLearners.FeatureFold: test_f = partial(Orange.evaluation.CrossValidationFeature, self.data, learners_c, self.fold_feature, **common_args) elif self.resampling == OWTestLearners.LeaveOneOut: test_f = partial(Orange.evaluation.LeaveOneOut, self.data, learners_c, **common_args) elif self.resampling == OWTestLearners.ShuffleSplit: train_size = self.SampleSizes[self.sample_size] / 100 test_f = partial(Orange.evaluation.ShuffleSplit, self.data, learners_c, n_resamples=self.NRepeats[self.n_repeats], train_size=train_size, test_size=None, stratified=self.shuffle_stratified, random_state=rstate, **common_args) elif self.resampling == OWTestLearners.TestOnTrain: test_f = partial(Orange.evaluation.TestOnTrainingData, self.data, learners_c, **common_args) elif self.resampling == OWTestLearners.TestOnTest: test_f = partial(Orange.evaluation.TestOnTestData, self.data, self.test_data, learners_c, **common_args) else: assert False, "self.resampling %s" % self.resampling def replace_learners(evalfunc, *args, **kwargs): res = evalfunc(*args, **kwargs) assert all(lc is lo for lc, lo in zip(learners_c, res.learners)) res.learners[:] = learners return res test_f = partial(replace_learners, test_f) self.__submit(test_f) def __submit(self, testfunc): # type: (Callable[[Callable[float]], Results]) -> None """ Submit a testing function for evaluation MUST not be called if an evaluation is already pending/running. Cancel the existing task first. Parameters ---------- testfunc : Callable[[Callable[float]], Results]) Must be a callable taking a single `callback` argument and returning a Results instance """ assert self.__state != State.Running # Setup the task task = Task() def progress_callback(finished): if task.cancelled: raise UserInterrupt() QMetaObject.invokeMethod(self, "setProgressValue", Qt.QueuedConnection, Q_ARG(float, 100 * finished)) def ondone(_): QMetaObject.invokeMethod(self, "__task_complete", Qt.QueuedConnection, Q_ARG(object, task)) testfunc = partial(testfunc, callback=progress_callback) task.future = self.__executor.submit(testfunc) task.future.add_done_callback(ondone) self.progressBarInit(processEvents=None) self.setBlocking(True) self.setStatusMessage("Running") self.__state = State.Running self.__task = task @Slot(object) def __task_complete(self, task): # handle a completed task assert self.thread() is QThread.currentThread() if self.__task is not task: assert task.cancelled log.debug("Reaping cancelled task: %r", "<>") return self.setBlocking(False) self.progressBarFinished(processEvents=None) self.setStatusMessage("") result = task.future assert result.done() self.__task = None try: results = result.result() # type: Results learners = results.learners # type: List[Learner] except Exception as er: log.exception("testing error (in __task_complete):", exc_info=True) self.error("\n".join(traceback.format_exception_only(type(er), er))) self.__state = State.Done return self.__state = State.Done learner_key = { slot.learner: key for key, slot in self.learners.items() } assert all(learner in learner_key for learner in learners) # Update the results for individual learners class_var = results.domain.class_var for learner, result in zip(learners, results.split_by_model()): stats = None if class_var.is_primitive(): ex = result.failed[0] if ex: stats = [Try.Fail(ex)] * len(self.scorers) result = Try.Fail(ex) else: stats = [ Try(scorer_caller(scorer, result)) for scorer in self.scorers ] result = Try.Success(result) key = learner_key.get(learner) self.learners[key] = \ self.learners[key]._replace(results=result, stats=stats) self._update_header() self._update_stats_model() self.commit() def cancel(self): """ Cancel the current/pending evaluation (if any). """ if self.__task is not None: assert self.__state == State.Running self.__state = State.Cancelled task, self.__task = self.__task, None task.cancel() assert task.future.done() def onDeleteWidget(self): self.cancel() super().onDeleteWidget()
class OW1ka(widget.OWWidget): name = "EnKlik Anketa" description = "Import data from EnKlikAnketa (1ka.si) public URL." icon = "icons/1ka.svg" priority = 200 class Outputs: data = Output("Data", Table) want_main_area = False resizing_enabled = False settingsHandler = settings.PerfectDomainContextHandler( match_values=settings.PerfectDomainContextHandler.MATCH_VALUES_ALL) recent = settings.Setting([]) reload_idx = settings.Setting(0) autocommit = settings.Setting(True) domain_editor = settings.SettingProvider(DomainEditor) UserAdviceMessages = [ widget.Message( 'You can import data from public links to 1ka surveys results. ' 'Click to learn more on how to get a shareable public link URL for ' '1ka surveys that you manage.', 'public-link', icon=widget.Message.Information, moreurl= 'http://english.1ka.si/db/24/468/Guides/Public_link_to_access_data_and_analysis/' ), ] class Error(widget.OWWidget.Error): net_error = widget.Msg( "Couldn't load data: {}. Ensure network connection, firewall ...") parse_error = widget.Msg( "Couldn't parse data: {}. Ensure well-formatted data or submit a bug report." ) invalid_url = widget.Msg( 'Invalid URL. Public shareable link should match: ' + VALID_URL_HELP) data_is_anal = widget.Msg( "The provided URL is a public link to 'Analysis'. Need public link to 'Data'." ) class Information(widget.OWWidget.Information): response_data_empty = widget.Msg( 'Response data is empty. Get some responses first.') def __init__(self): super().__init__() self.table = None self._html = None def _loadFinished(is_ok): if is_ok: QTimer.singleShot( 1, lambda: setattr(self, '_html', self.webview.html())) self.webview = WebviewWidget(loadFinished=_loadFinished) vb = gui.vBox(self.controlArea, 'Import Data') hb = gui.hBox(vb) self.combo = combo = URLComboBox( hb, self.recent, editable=True, minimumWidth=400, insertPolicy=QComboBox.InsertAtTop, toolTip='Format: ' + VALID_URL_HELP, editTextChanged=self.is_valid_url, # Indirect via QTimer because calling wait() -> processEvents, # while our currentIndexChanged event hadn't yet finished. # Avoids calling handler twice. currentIndexChanged=lambda: QTimer.singleShot(1, self.load_url)) hb.layout().addWidget(QLabel('Public link URL:', hb)) hb.layout().addWidget(combo) hb.layout().setStretch(1, 2) RELOAD_TIMES = ( ('No reload', ), ('5 s', 5000), ('10 s', 10000), ('30 s', 30000), ('1 min', 60 * 1000), ('2 min', 2 * 60 * 1000), ('5 min', 5 * 60 * 1000), ) reload_timer = QTimer(self, timeout=lambda: self.load_url(from_reload=True)) def _on_reload_changed(): if self.reload_idx == 0: reload_timer.stop() return reload_timer.start(RELOAD_TIMES[self.reload_idx][1]) gui.comboBox(vb, self, 'reload_idx', label='Reload every:', orientation=Qt.Horizontal, items=[i[0] for i in RELOAD_TIMES], callback=_on_reload_changed) box = gui.widgetBox(self.controlArea, "Columns (Double-click to edit)") self.domain_editor = DomainEditor(self) editor_model = self.domain_editor.model() def editorDataChanged(): self.apply_domain_edit() self.commit() editor_model.dataChanged.connect(editorDataChanged) box.layout().addWidget(self.domain_editor) box = gui.widgetBox(self.controlArea, "Info", addSpace=True) info = self.data_info = gui.widgetLabel(box, '') info.setWordWrap(True) self.controlArea.layout().addStretch(1) gui.auto_commit(self.controlArea, self, 'autocommit', label='Commit') self.set_info() def set_combo_items(self): self.combo.clear() for sheet in self.recent: self.combo.addItem(sheet.name, sheet.url) def commit(self): self.Outputs.data.send(self.table) def is_valid_url(self, url): if is_valid_url(url): self.Error.invalid_url.clear() return True self.Error.invalid_url() QToolTip.showText(self.combo.mapToGlobal(QPoint(0, 0)), self.combo.toolTip()) def load_url(self, from_reload=False): self.closeContext() self.domain_editor.set_domain(None) url = self.combo.currentText() if not self.is_valid_url(url): self.table = None self.commit() return if url not in self.recent: self.recent.insert(0, url) prev_table = self.table with self.progressBar(3) as progress: try: self._html = None self.webview.setUrl(url) wait(until=lambda: self._html is not None) progress.advance() # Wait some seconds for discrete labels to have loaded via AJAX, # then re-query HTML. # *Webview.loadFinished doesn't guarantee it sufficiently try: wait(until=lambda: False, timeout=1200) except TimeoutError: pass progress.advance() html = self.webview.html() except Exception as e: log.exception("Couldn't load data from: %s", url) self.Error.net_error(try_(lambda: e.args[0], '')) self.table = None else: self.Error.clear() self.Information.clear() self.table = None try: table = self.table = self.table_from_html(html) except DataEmptyError: self.Information.response_data_empty() except DataIsAnalError: self.Error.data_is_anal() except Exception as e: log.exception('Parsing error: %s', url) self.Error.parse_error(try_(lambda: e.args[0], '')) else: self.openContext(table.domain) self.combo.setTitleFor(self.combo.currentIndex(), table.name) def _equal(data1, data2): NAN = float('nan') return (try_(lambda: data1.checksum(), NAN) == try_(lambda: data2.checksum(), NAN)) self._orig_table = self.table self.apply_domain_edit() if not (from_reload and _equal(prev_table, self.table)): self.commit() def apply_domain_edit(self): data = self._orig_table if data is None: self.set_info() return domain, cols = self.domain_editor.get_domain(data.domain, data) # Copied verbatim from OWFile if not (domain.variables or domain.metas): table = None else: X, y, m = cols table = Table.from_numpy(domain, X, y, m, data.W) table.name = data.name table.ids = np.array(data.ids) table.attributes = getattr(data, 'attributes', {}) self.table = table self.set_info() DATETIME_VAR = 'Paradata (insert)' def table_from_html(self, html): soup = BeautifulSoup(html, 'html.parser') try: html_table = soup.find_all('table')[-1] except IndexError: raise DataEmptyError if '<h2>Anal' in html or 'div_analiza_' in html: raise DataIsAnalError def _header_row_strings(row): return chain.from_iterable( repeat(th.get_text(), int(th.get('colspan') or 1)) for th in html_table.select('thead tr:nth-of-type(%d) th[title]' % row)) # self.DATETIME_VAR (available when Paradata is enabled in 1ka UI) # should match this variable name format header = [ th1.rstrip(':') + ('' if th3 == th1 else ' ({})').format(th3.rstrip(':')) for th1, th3 in zip(_header_row_strings(1), _header_row_strings(3)) ] values = [ [ ( # If no span, feature is a number or a text field td.get_text() if td.span is None else # If have span, it's a number, but if negative, replace with NaN '' if td.contents[0].strip().startswith('-') else # Else if span, the number is its code, but we want its value td.span.get_text()[1:-1]) for td in tr.select('td') if 'data_uid' not in td.get('class', ()) ] for tr in html_table.select('tbody tr') ] # Save parsed values into in-mem file for default values processing buffer = StringIO() writer = csv.writer(buffer, delimiter='\t') writer.writerow(header) writer.writerows(values) buffer.flush() buffer.seek(0) data = TabReader(buffer).read() title = soup.select('body h2:nth-of-type(1)')[0].get_text().split( ': ', maxsplit=1)[-1] data.name = title return data def set_info(self): data = self.table if data is None: self.data_info.setText('No spreadsheet loaded.') return text = "{}\n\n{} instance(s), {} feature(s), {} meta attribute(s)\n".format( data.name, len(data), len(data.domain.attributes), len(data.domain.metas)) text += try_( lambda: '\nFirst entry: {}' '\nLast entry: {}'.format(data[0, self.DATETIME_VAR], data[ -1, self.DATETIME_VAR]), '') self.data_info.setText(text)
class OWTestLearners(OWWidget): name = "Test and Score" description = "Cross-validation accuracy estimation." icon = "icons/TestLearners1.svg" priority = 100 keywords = ['Cross Validation', 'CV'] class Inputs: train_data = Input("Data", Table, default=True) test_data = Input("Test Data", Table) learner = Input("Learner", Learner, multiple=True) preprocessor = Input("Preprocessor", Preprocess) class Outputs: predictions = Output("Predictions", Table) evaluations_results = Output("Evaluation Results", Results) settings_version = 3 UserAdviceMessages = [ widget.Message("Click on the table header to select shown columns", "click_header") ] settingsHandler = settings.PerfectDomainContextHandler() score_table = settings.SettingProvider(ScoreTable) #: Resampling/testing types KFold, FeatureFold, ShuffleSplit, LeaveOneOut, TestOnTrain, TestOnTest \ = 0, 1, 2, 3, 4, 5 #: Numbers of folds NFolds = [2, 3, 5, 10, 20] #: Number of repetitions NRepeats = [2, 3, 5, 10, 20, 50, 100] #: Sample sizes SampleSizes = [5, 10, 20, 25, 30, 33, 40, 50, 60, 66, 70, 75, 80, 90, 95] #: Selected resampling type resampling = settings.Setting(0) #: Number of folds for K-fold cross validation n_folds = settings.Setting(3) #: Stratified sampling for K-fold cv_stratified = settings.Setting(True) #: Number of repeats for ShuffleSplit sampling n_repeats = settings.Setting(3) #: ShuffleSplit sample size sample_size = settings.Setting(9) #: Stratified sampling for Random Sampling shuffle_stratified = settings.Setting(True) # CV where nr. of feature values determines nr. of folds fold_feature = settings.ContextSetting(None) fold_feature_selected = settings.ContextSetting(False) use_rope = settings.Setting(False) rope = settings.Setting(0.1) comparison_criterion = settings.Setting(0, schema_only=True) TARGET_AVERAGE = "(Average over classes)" class_selection = settings.ContextSetting(TARGET_AVERAGE) class Error(OWWidget.Error): train_data_empty = Msg("Train dataset is empty.") test_data_empty = Msg("Test dataset is empty.") class_required = Msg("Train data input requires a target variable.") too_many_classes = Msg("Too many target variables.") class_required_test = Msg( "Test data input requires a target variable.") too_many_folds = Msg("Number of folds exceeds the data size") class_inconsistent = Msg("Test and train datasets " "have different target variables.") memory_error = Msg("Not enough memory.") no_class_values = Msg("Target variable has no values.") only_one_class_var_value = Msg("Target variable has only one value.") test_data_incompatible = Msg( "Test data may be incompatible with train data.") class Warning(OWWidget.Warning): missing_data = \ Msg("Instances with unknown target values were removed from{}data.") test_data_missing = Msg("Missing separate test data input.") scores_not_computed = Msg("Some scores could not be computed.") test_data_unused = Msg("Test data is present but unused. " "Select 'Test on test data' to use it.") class Information(OWWidget.Information): data_sampled = Msg("Train data has been sampled") test_data_sampled = Msg("Test data has been sampled") test_data_transformed = Msg( "Test data has been transformed to match the train data.") def __init__(self): super().__init__() self.data = None self.test_data = None self.preprocessor = None self.train_data_missing_vals = False self.test_data_missing_vals = False self.scorers = [] self.__pending_comparison_criterion = self.comparison_criterion #: An Ordered dictionary with current inputs and their testing results. self.learners = OrderedDict() # type: Dict[Any, Input] self.__state = State.Waiting # Do we need to [re]test any learners, set by _invalidate and # cleared by __update self.__needupdate = False self.__task = None # type: Optional[TaskState] self.__executor = ThreadExecutor() sbox = gui.vBox(self.controlArea, "Sampling") rbox = gui.radioButtons(sbox, self, "resampling", callback=self._param_changed) gui.appendRadioButton(rbox, "Cross validation") ibox = gui.indentedBox(rbox) gui.comboBox(ibox, self, "n_folds", label="Number of folds: ", items=[str(x) for x in self.NFolds], maximumContentsLength=3, orientation=Qt.Horizontal, callback=self.kfold_changed) gui.checkBox(ibox, self, "cv_stratified", "Stratified", callback=self.kfold_changed) gui.appendRadioButton(rbox, "Cross validation by feature") ibox = gui.indentedBox(rbox) self.feature_model = DomainModel(order=DomainModel.METAS, valid_types=DiscreteVariable) self.features_combo = gui.comboBox(ibox, self, "fold_feature", model=self.feature_model, orientation=Qt.Horizontal, callback=self.fold_feature_changed) gui.appendRadioButton(rbox, "Random sampling") ibox = gui.indentedBox(rbox) gui.comboBox(ibox, self, "n_repeats", label="Repeat train/test: ", items=[str(x) for x in self.NRepeats], maximumContentsLength=3, orientation=Qt.Horizontal, callback=self.shuffle_split_changed) gui.comboBox(ibox, self, "sample_size", label="Training set size: ", items=["{} %".format(x) for x in self.SampleSizes], maximumContentsLength=5, orientation=Qt.Horizontal, callback=self.shuffle_split_changed) gui.checkBox(ibox, self, "shuffle_stratified", "Stratified", callback=self.shuffle_split_changed) gui.appendRadioButton(rbox, "Leave one out") gui.appendRadioButton(rbox, "Test on train data") gui.appendRadioButton(rbox, "Test on test data") self.cbox = gui.vBox(self.controlArea, "Target Class") self.class_selection_combo = gui.comboBox( self.cbox, self, "class_selection", items=[], sendSelectedValue=True, valueType=str, callback=self._on_target_class_changed, contentsLength=8) self.modcompbox = box = gui.vBox(self.controlArea, "Model Comparison") gui.comboBox(box, self, "comparison_criterion", model=PyListModel(), callback=self.update_comparison_table) hbox = gui.hBox(box) gui.checkBox(hbox, self, "use_rope", "Negligible difference: ", callback=self._on_use_rope_changed) gui.lineEdit(hbox, self, "rope", validator=QDoubleValidator(), controlWidth=70, callback=self.update_comparison_table, alignment=Qt.AlignRight) self.controls.rope.setEnabled(self.use_rope) gui.rubber(self.controlArea) self.score_table = ScoreTable(self) self.score_table.shownScoresChanged.connect(self.update_stats_model) view = self.score_table.view view.setSizeAdjustPolicy(view.AdjustToContents) box = gui.vBox(self.mainArea, "Evaluation Results") box.layout().addWidget(self.score_table.view) self.compbox = box = gui.vBox(self.mainArea, box="Model comparison") table = self.comparison_table = QTableWidget( wordWrap=False, editTriggers=QTableWidget.NoEditTriggers, selectionMode=QTableWidget.NoSelection) table.setSizeAdjustPolicy(table.AdjustToContents) header = table.verticalHeader() header.setSectionResizeMode(QHeaderView.Fixed) header.setSectionsClickable(False) header = table.horizontalHeader() header.setTextElideMode(Qt.ElideRight) header.setDefaultAlignment(Qt.AlignCenter) header.setSectionsClickable(False) header.setStretchLastSection(False) header.setSectionResizeMode(QHeaderView.ResizeToContents) avg_width = self.fontMetrics().averageCharWidth() header.setMinimumSectionSize(8 * avg_width) header.setMaximumSectionSize(15 * avg_width) header.setDefaultSectionSize(15 * avg_width) box.layout().addWidget(table) box.layout().addWidget( QLabel( "<small>Table shows probabilities that the score for the model in " "the row is higher than that of the model in the column. " "Small numbers show the probability that the difference is " "negligible.</small>", wordWrap=True)) @staticmethod def sizeHint(): return QSize(780, 1) def _update_controls(self): self.fold_feature = None self.feature_model.set_domain(None) if self.data: self.feature_model.set_domain(self.data.domain) if self.fold_feature is None and self.feature_model: self.fold_feature = self.feature_model[0] enabled = bool(self.feature_model) self.controls.resampling.buttons[ OWTestLearners.FeatureFold].setEnabled(enabled) self.features_combo.setEnabled(enabled) if self.resampling == OWTestLearners.FeatureFold and not enabled: self.resampling = OWTestLearners.KFold @Inputs.learner def set_learner(self, learner, key): """ Set the input `learner` for `key`. Parameters ---------- learner : Optional[Orange.base.Learner] key : Any """ if key in self.learners and learner is None: # Removed self._invalidate([key]) del self.learners[key] elif learner is not None: self.learners[key] = InputLearner(learner, None, None) self._invalidate([key]) @Inputs.train_data def set_train_data(self, data): """ Set the input training dataset. Parameters ---------- data : Optional[Orange.data.Table] """ self.cancel() self.Information.data_sampled.clear() self.Error.train_data_empty.clear() self.Error.class_required.clear() self.Error.too_many_classes.clear() self.Error.no_class_values.clear() self.Error.only_one_class_var_value.clear() if data is not None and not data: self.Error.train_data_empty() data = None if data: conds = [ not data.domain.class_vars, len(data.domain.class_vars) > 1, np.isnan(data.Y).all(), data.domain.has_discrete_class and len(data.domain.class_var.values) == 1 ] errors = [ self.Error.class_required, self.Error.too_many_classes, self.Error.no_class_values, self.Error.only_one_class_var_value ] for cond, error in zip(conds, errors): if cond: error() data = None break if isinstance(data, SqlTable): if data.approx_len() < AUTO_DL_LIMIT: data = Table(data) else: self.Information.data_sampled() data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(AUTO_DL_LIMIT, partial=True) data = Table(data_sample) self.train_data_missing_vals = \ data is not None and np.isnan(data.Y).any() if self.train_data_missing_vals or self.test_data_missing_vals: self.Warning.missing_data(self._which_missing_data()) if data: data = HasClass()(data) else: self.Warning.missing_data.clear() self.data = data self.closeContext() self._update_scorers() self._update_controls() if data is not None: self._update_class_selection() self.openContext(data.domain) if self.fold_feature_selected and bool(self.feature_model): self.resampling = OWTestLearners.FeatureFold self._invalidate() @Inputs.test_data def set_test_data(self, data): # type: (Orange.data.Table) -> None """ Set the input separate testing dataset. Parameters ---------- data : Optional[Orange.data.Table] """ self.Information.test_data_sampled.clear() self.Error.test_data_empty.clear() if data is not None and not data: self.Error.test_data_empty() data = None if data and not data.domain.class_var: self.Error.class_required_test() data = None else: self.Error.class_required_test.clear() if isinstance(data, SqlTable): if data.approx_len() < AUTO_DL_LIMIT: data = Table(data) else: self.Information.test_data_sampled() data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(AUTO_DL_LIMIT, partial=True) data = Table(data_sample) self.test_data_missing_vals = \ data is not None and np.isnan(data.Y).any() if self.train_data_missing_vals or self.test_data_missing_vals: self.Warning.missing_data(self._which_missing_data()) if data: data = HasClass()(data) else: self.Warning.missing_data.clear() self.test_data = data if self.resampling == OWTestLearners.TestOnTest: self._invalidate() def _which_missing_data(self): return { (True, True): " ", # both, don't specify (True, False): " train ", (False, True): " test " }[(self.train_data_missing_vals, self.test_data_missing_vals)] # List of scorers shouldn't be retrieved globally, when the module is # loading since add-ons could have registered additional scorers. # It could have been cached but # - we don't gain much with it # - it complicates the unit tests def _update_scorers(self): if self.data and self.data.domain.class_var: new_scorers = usable_scorers(self.data.domain.class_var) else: new_scorers = [] # Don't unnecessarily reset the model because this would always reset # comparison_criterion; we alse set it explicitly, though, for clarity if new_scorers != self.scorers: self.scorers = new_scorers self.controls.comparison_criterion.model()[:] = \ [scorer.long_name or scorer.name for scorer in self.scorers] self.comparison_criterion = 0 if self.__pending_comparison_criterion is not None: # Check for the unlikely case that some scorers have been removed # from modules if self.__pending_comparison_criterion < len(self.scorers): self.comparison_criterion = self.__pending_comparison_criterion self.__pending_comparison_criterion = None self._update_compbox_title() def _update_compbox_title(self): criterion = self.comparison_criterion if criterion < len(self.scorers): scorer = self.scorers[criterion]() self.compbox.setTitle(f"Model Comparison by {scorer.name}") else: self.compbox.setTitle(f"Model Comparison") @Inputs.preprocessor def set_preprocessor(self, preproc): """ Set the input preprocessor to apply on the training data. """ self.preprocessor = preproc self._invalidate() def handleNewSignals(self): """Reimplemented from OWWidget.handleNewSignals.""" self._update_class_selection() self.score_table.update_header(self.scorers) self._update_view_enabled() self.update_stats_model() if self.__needupdate: self.__update() def kfold_changed(self): self.resampling = OWTestLearners.KFold self._param_changed() def fold_feature_changed(self): self.resampling = OWTestLearners.FeatureFold self._param_changed() def shuffle_split_changed(self): self.resampling = OWTestLearners.ShuffleSplit self._param_changed() def _param_changed(self): self.modcompbox.setEnabled(self.resampling == OWTestLearners.KFold) self._update_view_enabled() self._invalidate() self.__update() def _update_view_enabled(self): self.comparison_table.setEnabled( self.resampling == OWTestLearners.KFold and len(self.learners) > 1 and self.data is not None) self.score_table.view.setEnabled(self.data is not None) def update_stats_model(self): # Update the results_model with up to date scores. # Note: The target class specific scores (if requested) are # computed as needed in this method. model = self.score_table.model # clear the table model, but preserving the header labels for r in reversed(range(model.rowCount())): model.takeRow(r) target_index = None if self.data is not None: class_var = self.data.domain.class_var if self.data.domain.has_discrete_class and \ self.class_selection != self.TARGET_AVERAGE: target_index = class_var.values.index(self.class_selection) else: class_var = None errors = [] has_missing_scores = False names = [] for key, slot in self.learners.items(): name = learner_name(slot.learner) names.append(name) head = QStandardItem(name) head.setData(key, Qt.UserRole) results = slot.results if results is not None and results.success: train = QStandardItem("{:.3f}".format( results.value.train_time)) train.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) train.setData(key, Qt.UserRole) test = QStandardItem("{:.3f}".format(results.value.test_time)) test.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) test.setData(key, Qt.UserRole) row = [head, train, test] else: row = [head] if isinstance(results, Try.Fail): head.setToolTip(str(results.exception)) head.setText("{} (error)".format(name)) head.setForeground(QtGui.QBrush(Qt.red)) if isinstance(results.exception, DomainTransformationError) \ and self.resampling == self.TestOnTest: self.Error.test_data_incompatible() self.Information.test_data_transformed.clear() else: errors.append("{name} failed with error:\n" "{exc.__class__.__name__}: {exc!s}".format( name=name, exc=slot.results.exception)) if class_var is not None and class_var.is_discrete and \ target_index is not None: if slot.results is not None and slot.results.success: ovr_results = results_one_vs_rest(slot.results.value, target_index) # Cell variable is used immediatelly, it's not stored # pylint: disable=cell-var-from-loop stats = [ Try(scorer_caller(scorer, ovr_results, target=1)) for scorer in self.scorers ] else: stats = None else: stats = slot.stats if stats is not None: for stat, scorer in zip(stats, self.scorers): item = QStandardItem() item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) if stat.success: item.setData(float(stat.value[0]), Qt.DisplayRole) else: item.setToolTip(str(stat.exception)) if scorer.name in self.score_table.shown_scores: has_missing_scores = True row.append(item) model.appendRow(row) # Resort rows based on current sorting header = self.score_table.view.horizontalHeader() model.sort(header.sortIndicatorSection(), header.sortIndicatorOrder()) self._set_comparison_headers(names) self.error("\n".join(errors), shown=bool(errors)) self.Warning.scores_not_computed(shown=has_missing_scores) def _on_use_rope_changed(self): self.controls.rope.setEnabled(self.use_rope) self.update_comparison_table() def update_comparison_table(self): self.comparison_table.clearContents() slots = self._successful_slots() if not (slots and self.scorers): return names = [learner_name(slot.learner) for slot in slots] self._set_comparison_headers(names) if self.resampling == OWTestLearners.KFold: scores = self._scores_by_folds(slots) self._fill_table(names, scores) def _successful_slots(self): model = self.score_table.model proxy = self.score_table.sorted_model keys = (model.data(proxy.mapToSource(proxy.index(row, 0)), Qt.UserRole) for row in range(proxy.rowCount())) slots = [ slot for slot in (self.learners[key] for key in keys) if slot.results is not None and slot.results.success ] return slots def _set_comparison_headers(self, names): table = self.comparison_table try: # Prevent glitching during update table.setUpdatesEnabled(False) header = table.horizontalHeader() if len(names) > 2: header.setSectionResizeMode(QHeaderView.Stretch) else: header.setSectionResizeMode(QHeaderView.Fixed) table.setRowCount(len(names)) table.setColumnCount(len(names)) table.setVerticalHeaderLabels(names) table.setHorizontalHeaderLabels(names) finally: table.setUpdatesEnabled(True) def _scores_by_folds(self, slots): scorer = self.scorers[self.comparison_criterion]() self._update_compbox_title() if scorer.is_binary: if self.class_selection != self.TARGET_AVERAGE: class_var = self.data.domain.class_var target_index = class_var.values.index(self.class_selection) kw = dict(target=target_index) else: kw = dict(average='weighted') else: kw = {} def call_scorer(results): def thunked(): return scorer.scores_by_folds(results.value, **kw).flatten() return thunked scores = [Try(call_scorer(slot.results)) for slot in slots] scores = [score.value if score.success else None for score in scores] # `None in scores doesn't work -- these are np.arrays) if any(score is None for score in scores): self.Warning.scores_not_computed() return scores def _fill_table(self, names, scores): table = self.comparison_table for row, row_name, row_scores in zip(count(), names, scores): for col, col_name, col_scores in zip(range(row), names, scores): if row_scores is None or col_scores is None: continue if self.use_rope and self.rope: p0, rope, p1 = baycomp.two_on_single( row_scores, col_scores, self.rope) if np.isnan(p0) or np.isnan(rope) or np.isnan(p1): self._set_cells_na(table, row, col) continue self._set_cell( table, row, col, f"{p0:.3f}<br/><small>{rope:.3f}</small>", f"p({row_name} > {col_name}) = {p0:.3f}\n" f"p({row_name} = {col_name}) = {rope:.3f}") self._set_cell( table, col, row, f"{p1:.3f}<br/><small>{rope:.3f}</small>", f"p({col_name} > {row_name}) = {p1:.3f}\n" f"p({col_name} = {row_name}) = {rope:.3f}") else: p0, p1 = baycomp.two_on_single(row_scores, col_scores) if np.isnan(p0) or np.isnan(p1): self._set_cells_na(table, row, col) continue self._set_cell(table, row, col, f"{p0:.3f}", f"p({row_name} > {col_name}) = {p0:.3f}") self._set_cell(table, col, row, f"{p1:.3f}", f"p({col_name} > {row_name}) = {p1:.3f}") @classmethod def _set_cells_na(cls, table, row, col): cls._set_cell(table, row, col, "NA", "comparison cannot be computed") cls._set_cell(table, col, row, "NA", "comparison cannot be computed") @staticmethod def _set_cell(table, row, col, label, tooltip): item = QLabel(label) item.setToolTip(tooltip) item.setAlignment(Qt.AlignCenter) table.setCellWidget(row, col, item) def _update_class_selection(self): self.class_selection_combo.setCurrentIndex(-1) self.class_selection_combo.clear() if not self.data: return if self.data.domain.has_discrete_class: self.cbox.setVisible(True) class_var = self.data.domain.class_var items = [self.TARGET_AVERAGE] + class_var.values self.class_selection_combo.addItems(items) class_index = 0 if self.class_selection in class_var.values: class_index = class_var.values.index(self.class_selection) + 1 self.class_selection_combo.setCurrentIndex(class_index) self.class_selection = items[class_index] else: self.cbox.setVisible(False) def _on_target_class_changed(self): self.update_stats_model() self.update_comparison_table() def _invalidate(self, which=None): self.cancel() self.fold_feature_selected = \ self.resampling == OWTestLearners.FeatureFold # Invalidate learner results for `which` input keys # (if None then all learner results are invalidated) if which is None: which = self.learners.keys() model = self.score_table.model statmodelkeys = [ model.item(row, 0).data(Qt.UserRole) for row in range(model.rowCount()) ] for key in which: self.learners[key] = \ self.learners[key]._replace(results=None, stats=None) if key in statmodelkeys: row = statmodelkeys.index(key) for c in range(1, model.columnCount()): item = model.item(row, c) if item is not None: item.setData(None, Qt.DisplayRole) item.setData(None, Qt.ToolTipRole) self.comparison_table.clearContents() self.__needupdate = True def commit(self): """ Commit the results to output. """ self.Error.memory_error.clear() valid = [ slot for slot in self.learners.values() if slot.results is not None and slot.results.success ] combined = None predictions = None if valid: # Evaluation results combined = results_merge([slot.results.value for slot in valid]) combined.learner_names = [ learner_name(slot.learner) for slot in valid ] # Predictions & Probabilities try: predictions = combined.get_augmented_data( combined.learner_names) except MemoryError: self.Error.memory_error() self.Outputs.evaluations_results.send(combined) self.Outputs.predictions.send(predictions) def send_report(self): """Report on the testing schema and results""" if not self.data or not self.learners: return if self.resampling == self.KFold: stratified = 'Stratified ' if self.cv_stratified else '' items = [("Sampling type", "{}{}-fold Cross validation".format( stratified, self.NFolds[self.n_folds]))] elif self.resampling == self.LeaveOneOut: items = [("Sampling type", "Leave one out")] elif self.resampling == self.ShuffleSplit: stratified = 'Stratified ' if self.shuffle_stratified else '' items = [ ("Sampling type", "{}Shuffle split, {} random samples with {}% data ".format( stratified, self.NRepeats[self.n_repeats], self.SampleSizes[self.sample_size])) ] elif self.resampling == self.TestOnTrain: items = [("Sampling type", "No sampling, test on training data")] elif self.resampling == self.TestOnTest: items = [("Sampling type", "No sampling, test on testing data")] else: items = [] if self.data.domain.has_discrete_class: items += [("Target class", self.class_selection.strip("()"))] if items: self.report_items("Settings", items) self.report_table("Scores", self.score_table.view) @classmethod def migrate_settings(cls, settings_, version): if version < 2: if settings_["resampling"] > 0: settings_["resampling"] += 1 if version < 3: # Older version used an incompatible context handler settings_["context_settings"] = [ c for c in settings_.get("context_settings", ()) if not hasattr(c, 'classes') ] @Slot(float) def setProgressValue(self, value): self.progressBarSet(value) def __update(self): self.__needupdate = False assert self.__task is None or self.__state == State.Running if self.__state == State.Running: self.cancel() self.Warning.test_data_unused.clear() self.Error.test_data_incompatible.clear() self.Warning.test_data_missing.clear() self.Information.test_data_transformed( shown=self.resampling == self.TestOnTest and self.data is not None and self.test_data is not None and self.data.domain.attributes != self.test_data.domain.attributes) self.warning() self.Error.class_inconsistent.clear() self.Error.too_many_folds.clear() self.error() # check preconditions and return early if self.data is None: self.__state = State.Waiting self.commit() return if not self.learners: self.__state = State.Waiting self.commit() return if self.resampling == OWTestLearners.KFold and \ len(self.data) < self.NFolds[self.n_folds]: self.Error.too_many_folds() self.__state = State.Waiting self.commit() return elif self.resampling == OWTestLearners.TestOnTest: if self.test_data is None: if not self.Error.test_data_empty.is_shown(): self.Warning.test_data_missing() self.__state = State.Waiting self.commit() return elif self.test_data.domain.class_var != self.data.domain.class_var: self.Error.class_inconsistent() self.__state = State.Waiting self.commit() return elif self.test_data is not None: self.Warning.test_data_unused() rstate = 42 # items in need of an update items = [(key, slot) for key, slot in self.learners.items() if slot.results is None] learners = [slot.learner for _, slot in items] # deepcopy all learners as they are not thread safe (by virtue of # the base API). These will be the effective learner objects tested # but will be replaced with the originals on return (see restore # learners bellow) learners_c = [copy.deepcopy(learner) for learner in learners] if self.resampling == OWTestLearners.TestOnTest: test_f = partial( Orange.evaluation.TestOnTestData(store_data=True, store_models=True), self.data, self.test_data, learners_c, self.preprocessor) else: if self.resampling == OWTestLearners.KFold: sampler = Orange.evaluation.CrossValidation( k=self.NFolds[self.n_folds], random_state=rstate) elif self.resampling == OWTestLearners.FeatureFold: sampler = Orange.evaluation.CrossValidationFeature( feature=self.fold_feature) elif self.resampling == OWTestLearners.LeaveOneOut: sampler = Orange.evaluation.LeaveOneOut() elif self.resampling == OWTestLearners.ShuffleSplit: sampler = Orange.evaluation.ShuffleSplit( n_resamples=self.NRepeats[self.n_repeats], train_size=self.SampleSizes[self.sample_size] / 100, test_size=None, stratified=self.shuffle_stratified, random_state=rstate) elif self.resampling == OWTestLearners.TestOnTrain: sampler = Orange.evaluation.TestOnTrainingData( store_models=True) else: assert False, "self.resampling %s" % self.resampling sampler.store_data = True test_f = partial(sampler, self.data, learners_c, self.preprocessor) def replace_learners(evalfunc, *args, **kwargs): res = evalfunc(*args, **kwargs) assert all(lc is lo for lc, lo in zip(learners_c, res.learners)) res.learners[:] = learners return res test_f = partial(replace_learners, test_f) self.__submit(test_f) def __submit(self, testfunc): # type: (Callable[[Callable[[float], None]], Results]) -> None """ Submit a testing function for evaluation MUST not be called if an evaluation is already pending/running. Cancel the existing task first. Parameters ---------- testfunc : Callable[[Callable[float]], Results]) Must be a callable taking a single `callback` argument and returning a Results instance """ assert self.__state != State.Running # Setup the task task = TaskState() def progress_callback(finished): if task.is_interruption_requested(): raise UserInterrupt() task.set_progress_value(100 * finished) testfunc = partial(testfunc, callback=progress_callback) task.start(self.__executor, testfunc) task.progress_changed.connect(self.setProgressValue) task.watcher.finished.connect(self.__task_complete) self.Outputs.evaluations_results.invalidate() self.Outputs.predictions.invalidate() self.progressBarInit() self.setStatusMessage("Running") self.__state = State.Running self.__task = task @Slot(object) def __task_complete(self, f: 'Future[Results]'): # handle a completed task assert self.thread() is QThread.currentThread() assert self.__task is not None and self.__task.future is f self.progressBarFinished() self.setStatusMessage("") assert f.done() self.__task = None self.__state = State.Done try: results = f.result() # type: Results learners = results.learners # type: List[Learner] except Exception as er: # pylint: disable=broad-except log.exception("testing error (in __task_complete):", exc_info=True) self.error("\n".join(traceback.format_exception_only(type(er), er))) return learner_key = { slot.learner: key for key, slot in self.learners.items() } assert all(learner in learner_key for learner in learners) # Update the results for individual learners class_var = results.domain.class_var for learner, result in zip(learners, results.split_by_model()): stats = None if class_var.is_primitive(): ex = result.failed[0] if ex: stats = [Try.Fail(ex)] * len(self.scorers) result = Try.Fail(ex) else: stats = [ Try(scorer_caller(scorer, result)) for scorer in self.scorers ] result = Try.Success(result) key = learner_key.get(learner) self.learners[key] = \ self.learners[key]._replace(results=result, stats=stats) self.score_table.update_header(self.scorers) self.update_stats_model() self.update_comparison_table() self.commit() def cancel(self): """ Cancel the current/pending evaluation (if any). """ if self.__task is not None: assert self.__state == State.Running self.__state = State.Cancelled task, self.__task = self.__task, None task.cancel() task.progress_changed.disconnect(self.setProgressValue) task.watcher.finished.disconnect(self.__task_complete) self.progressBarFinished() self.setStatusMessage("") def onDeleteWidget(self): self.cancel() super().onDeleteWidget()
class OWDistanceMap(widget.OWWidget): name = "距离图(Distance Map)" description = "可视化距离矩阵" icon = "icons/DistanceMap.svg" priority = 1200 keywords = ['juliyingshe', 'yingshe', 'julitu'] category = '非监督(Unsupervised)' class Inputs: distances = Input("距离(Distances)", Orange.misc.DistMatrix, replaces=['Distances']) class Outputs: selected_data = Output("选定的数据(Selected Data)", Orange.data.Table, default=True, replaces=['Selected Data']) annotated_data = Output("数据(Data)", Orange.data.Table, replaces=['Data']) features = Output("特征(Features)", widget.AttributeList, dynamic=False, replaces=['Features']) settingsHandler = settings.PerfectDomainContextHandler() #: type of ordering to apply to matrix rows/columns NoOrdering, Clustering, OrderedClustering = 0, 1, 2 sorting = settings.Setting(NoOrdering) palette_name = settings.Setting(colorpalettes.DefaultContinuousPaletteName) color_gamma = settings.Setting(0.0) color_low = settings.Setting(0.0) color_high = settings.Setting(1.0) annotation_idx = settings.ContextSetting(0) pending_selection = settings.Setting(None, schema_only=True) autocommit = settings.Setting(True) graph_name = "grid_widget" # Disable clustering for inputs bigger than this _MaxClustering = 25000 # Disable cluster leaf ordering for inputs bigger than this _MaxOrderedClustering = 2000 def __init__(self): super().__init__() self.matrix = None self._matrix_range = 0. self._tree = None self._ordered_tree = None self._sorted_matrix = None self._sort_indices = None self._selection = None self.sorting_cb = gui.comboBox( self.controlArea, self, "sorting", box="元素排序", items=["无", "聚类(Clustering)", "有序叶聚类"], callback=self._invalidate_ordering) box = gui.vBox(self.controlArea, "颜色") self.color_map_widget = cmw = ColorGradientSelection( thresholds=(self.color_low, self.color_high), ) model = itemmodels.ContinuousPalettesModel(parent=self) cmw.setModel(model) idx = cmw.findData(self.palette_name, model.KeyRole) if idx != -1: cmw.setCurrentIndex(idx) cmw.activated.connect(self._update_color) def _set_thresholds(low, high): self.color_low, self.color_high = low, high self._update_color() cmw.thresholdsChanged.connect(_set_thresholds) box.layout().addWidget(self.color_map_widget) self.annot_combo = gui.comboBox( self.controlArea, self, "annotation_idx", box="注释", contentsLength=12, searchable=True, callback=self._invalidate_annotations ) self.annot_combo.setModel(itemmodels.VariableListModel()) self.annot_combo.model()[:] = ["无", "枚举"] gui.rubber(self.controlArea) gui.auto_send(self.buttonsArea, self, "autocommit") self.view = GraphicsView(background=None) self.mainArea.layout().addWidget(self.view) self.grid_widget = pg.GraphicsWidget() self.grid = QGraphicsGridLayout() self.grid_widget.setLayout(self.grid) self.gradient_legend = GradientLegendWidget(0, 1, self._color_map()) self.gradient_legend.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Fixed) self.gradient_legend.setMaximumWidth(250) self.grid.addItem(self.gradient_legend, 0, 1) self.viewbox = pg.ViewBox(enableMouse=False, enableMenu=False) self.viewbox.setAcceptedMouseButtons(Qt.NoButton) self.viewbox.setAcceptHoverEvents(False) self.grid.addItem(self.viewbox, 2, 1) self.left_dendrogram = DendrogramWidget( self.grid_widget, orientation=DendrogramWidget.Left, selectionMode=DendrogramWidget.NoSelection, hoverHighlightEnabled=False ) self.left_dendrogram.setAcceptedMouseButtons(Qt.NoButton) self.left_dendrogram.setAcceptHoverEvents(False) self.top_dendrogram = DendrogramWidget( self.grid_widget, orientation=DendrogramWidget.Top, selectionMode=DendrogramWidget.NoSelection, hoverHighlightEnabled=False ) self.top_dendrogram.setAcceptedMouseButtons(Qt.NoButton) self.top_dendrogram.setAcceptHoverEvents(False) self.grid.addItem(self.left_dendrogram, 2, 0) self.grid.addItem(self.top_dendrogram, 1, 1) self.right_labels = TextList( alignment=Qt.AlignLeft | Qt.AlignVCenter, sizePolicy=QSizePolicy(QSizePolicy.Fixed, QSizePolicy.Expanding) ) self.bottom_labels = TextList( orientation=Qt.Horizontal, alignment=Qt.AlignRight | Qt.AlignVCenter, sizePolicy=QSizePolicy(QSizePolicy.Expanding, QSizePolicy.Fixed) ) self.grid.addItem(self.right_labels, 2, 2) self.grid.addItem(self.bottom_labels, 3, 1) self.view.setCentralItem(self.grid_widget) self.gradient_legend.hide() self.left_dendrogram.hide() self.top_dendrogram.hide() self.right_labels.hide() self.bottom_labels.hide() self.matrix_item = None self.dendrogram = None self.settingsAboutToBePacked.connect(self.pack_settings) def pack_settings(self): if self.matrix_item is not None: self.pending_selection = self.matrix_item.selections() else: self.pending_selection = None @Inputs.distances def set_distances(self, matrix): self.closeContext() self.clear() self.error() if matrix is not None: N, _ = matrix.shape if N < 2: self.error("Empty distance matrix.") matrix = None self.matrix = matrix if matrix is not None: self._matrix_range = numpy.nanmax(matrix) self.set_items(matrix.row_items, matrix.axis) else: self._matrix_range = 0. self.set_items(None) if matrix is not None: N, _ = matrix.shape else: N = 0 model = self.sorting_cb.model() item = model.item(2) msg = None if N > OWDistanceMap._MaxOrderedClustering: item.setFlags(item.flags() & ~Qt.ItemIsEnabled) if self.sorting == OWDistanceMap.OrderedClustering: self.sorting = OWDistanceMap.Clustering msg = "Cluster ordering was disabled due to the input " \ "matrix being to big" else: item.setFlags(item.flags() | Qt.ItemIsEnabled) item = model.item(1) if N > OWDistanceMap._MaxClustering: item.setFlags(item.flags() & ~Qt.ItemIsEnabled) if self.sorting == OWDistanceMap.Clustering: self.sorting = OWDistanceMap.NoOrdering msg = "Clustering was disabled due to the input " \ "matrix being to big" else: item.setFlags(item.flags() | Qt.ItemIsEnabled) self.information(msg) def set_items(self, items, axis=1): self.items = items model = self.annot_combo.model() if items is None: model[:] = ["无", "枚举"] elif not axis: model[:] = ["无", "枚举", "Attribute names"] elif isinstance(items, Orange.data.Table): annot_vars = list(filter_visible(items.domain.variables)) + list(items.domain.metas) model[:] = ["无", "枚举"] + annot_vars self.annotation_idx = 0 self.openContext(items.domain) elif isinstance(items, list) and \ all(isinstance(item, Orange.data.Variable) for item in items): model[:] = ["无", "枚举", "Name"] else: model[:] = ["无", "枚举"] self.annotation_idx = min(self.annotation_idx, len(model) - 1) def clear(self): self.matrix = None self._tree = None self._ordered_tree = None self._sorted_matrix = None self._selection = [] self._clear_plot() def handleNewSignals(self): if self.matrix is not None: self._update_ordering() self._setup_scene() self._update_labels() if self.pending_selection is not None: self.matrix_item.set_selections(self.pending_selection) self.pending_selection = None self.commit.now() def _clear_plot(self): def remove(item): item.setParentItem(None) item.scene().removeItem(item) if self.matrix_item is not None: self.matrix_item.selectionChanged.disconnect( self._invalidate_selection) remove(self.matrix_item) self.matrix_item = None self._set_displayed_dendrogram(None) self._set_labels(None) self.gradient_legend.hide() def _cluster_tree(self): if self._tree is None: self._tree = hierarchical.dist_matrix_clustering(self.matrix) return self._tree def _ordered_cluster_tree(self): if self._ordered_tree is None: tree = self._cluster_tree() self._ordered_tree = \ hierarchical.optimal_leaf_ordering(tree, self.matrix) return self._ordered_tree def _setup_scene(self): self._clear_plot() self.matrix_item = DistanceMapItem(self._sorted_matrix) # Scale the y axis to compensate for pg.ViewBox's y axis invert self.matrix_item.setTransform(QTransform.fromScale(1, -1), ) self.viewbox.addItem(self.matrix_item) # Set fixed view box range. h, w = self._sorted_matrix.shape self.viewbox.setRange(QRectF(0, -h, w, h), padding=0) self.matrix_item.selectionChanged.connect(self._invalidate_selection) if self.sorting == OWDistanceMap.NoOrdering: tree = None elif self.sorting == OWDistanceMap.Clustering: tree = self._cluster_tree() elif self.sorting == OWDistanceMap.OrderedClustering: tree = self._ordered_cluster_tree() self._set_displayed_dendrogram(tree) self._update_color() def _set_displayed_dendrogram(self, root): self.left_dendrogram.set_root(root) self.top_dendrogram.set_root(root) self.left_dendrogram.setVisible(root is not None) self.top_dendrogram.setVisible(root is not None) constraint = 0 if root is None else -1 # 150 self.left_dendrogram.setMaximumWidth(constraint) self.top_dendrogram.setMaximumHeight(constraint) def _invalidate_ordering(self): self._sorted_matrix = None if self.matrix is not None: self._update_ordering() self._setup_scene() self._update_labels() self._invalidate_selection() def _update_ordering(self): if self.sorting == OWDistanceMap.NoOrdering: self._sorted_matrix = self.matrix self._sort_indices = None else: if self.sorting == OWDistanceMap.Clustering: tree = self._cluster_tree() elif self.sorting == OWDistanceMap.OrderedClustering: tree = self._ordered_cluster_tree() leaves = hierarchical.leaves(tree) indices = numpy.array([leaf.value.index for leaf in leaves]) X = self.matrix self._sorted_matrix = X[indices[:, numpy.newaxis], indices[numpy.newaxis, :]] self._sort_indices = indices def _invalidate_annotations(self): if self.matrix is not None: self._update_labels() def _update_labels(self, ): if self.annotation_idx == 0: # None labels = None elif self.annotation_idx == 1: # Enumeration labels = [str(i + 1) for i in range(self.matrix.shape[0])] elif self.annot_combo.model()[self.annotation_idx] == "Attribute names": attr = self.matrix.row_items.domain.attributes labels = [str(attr[i]) for i in range(self.matrix.shape[0])] elif self.annotation_idx == 2 and \ isinstance(self.items, widget.AttributeList): labels = [v.name for v in self.items] elif isinstance(self.items, Orange.data.Table): var = self.annot_combo.model()[self.annotation_idx] column, _ = self.items.get_column_view(var) labels = [var.str_val(value) for value in column] self._set_labels(labels) def _set_labels(self, labels): self._labels = labels if labels and self.sorting != OWDistanceMap.NoOrdering: sortind = self._sort_indices labels = [labels[i] for i in sortind] for textlist in [self.right_labels, self.bottom_labels]: textlist.setItems(labels or []) textlist.setVisible(bool(labels)) constraint = -1 if labels else 0 self.right_labels.setMaximumWidth(constraint) self.bottom_labels.setMaximumHeight(constraint) def _color_map(self) -> GradientColorMap: palette = self.color_map_widget.currentData() return GradientColorMap( palette.lookup_table(), thresholds=(self.color_low, max(self.color_high, self.color_low)), span=(0., self._matrix_range)) def _update_color(self): palette = self.color_map_widget.currentData() self.palette_name = palette.name if self.matrix_item: cmap = self._color_map().replace(span=(0., 1.)) colors = cmap.apply(numpy.arange(256) / 255.) self.matrix_item.setLookupTable(colors) self.gradient_legend.show() self.gradient_legend.setRange(0, self._matrix_range) self.gradient_legend.setColorMap(self._color_map()) def _invalidate_selection(self): ranges = self.matrix_item.selections() ranges = reduce(iadd, ranges, []) indices = reduce(iadd, ranges, []) if self.sorting != OWDistanceMap.NoOrdering: sortind = self._sort_indices indices = [sortind[i] for i in indices] self._selection = list(sorted(set(indices))) self.commit.deferred() @gui.deferred def commit(self): datasubset = None featuresubset = None if not self._selection: pass elif isinstance(self.items, Orange.data.Table): indices = self._selection if self.matrix.axis == 1: datasubset = self.items.from_table_rows(self.items, indices) elif self.matrix.axis == 0: domain = Orange.data.Domain( [self.items.domain[i] for i in indices], self.items.domain.class_vars, self.items.domain.metas) datasubset = self.items.transform(domain) elif isinstance(self.items, widget.AttributeList): subset = [self.items[i] for i in self._selection] featuresubset = widget.AttributeList(subset) self.Outputs.selected_data.send(datasubset) self.Outputs.annotated_data.send(create_annotated_table(self.items, self._selection)) self.Outputs.features.send(featuresubset) def onDeleteWidget(self): super().onDeleteWidget() self.clear() def send_report(self): annot = self.annot_combo.currentText() if self.annotation_idx <= 1: annot = annot.lower() self.report_items(( ("Sorting", self.sorting_cb.currentText().lower()), ("Annotations", annot) )) if self.matrix is not None: self.report_plot()