class OWRank(widget.OWWidget): name = "Rank" description = "Rank and filter data features by their relevance." icon = "icons/Rank.svg" priority = 1102 inputs = [("Data", Orange.data.Table, "setData"), ("Scorer", score.Scorer, "set_learner", widget.Multiple)] outputs = [("Reduced Data", Orange.data.Table)] SelectNone, SelectAll, SelectManual, SelectNBest = range(4) selectMethod = settings.Setting(SelectNBest) nSelected = settings.Setting(5) auto_apply = settings.Setting(True) # Header state for discrete/continuous scores headerState = settings.Setting((None, None)) def __init__(self): super().__init__() self.out_domain_desc = None self.all_measures = SCORES self.selectedMeasures = dict( [(name, True) for name in _DEFAULT_SELECTED] + [(m.name, False) for m in self.all_measures[len(_DEFAULT_SELECTED):]] ) # Discrete (0) or continuous (1) class mode self.rankMode = 0 self.data = None self.discMeasures = [m for m in self.all_measures if issubclass(DiscreteVariable, m.score.class_type)] self.contMeasures = [m for m in self.all_measures if issubclass(ContinuousVariable, m.score.class_type)] selMethBox = gui.widgetBox( self.controlArea, "Select attributes", addSpace=True) grid = QtGui.QGridLayout() grid.setContentsMargins(0, 0, 0, 0) self.selectButtons = QtGui.QButtonGroup() self.selectButtons.buttonClicked[int].connect(self.setSelectMethod) def button(text, buttonid, toolTip=None): b = QtGui.QRadioButton(text) self.selectButtons.addButton(b, buttonid) if toolTip is not None: b.setToolTip(toolTip) return b b1 = button(self.tr("None"), OWRank.SelectNone) b2 = button(self.tr("All"), OWRank.SelectAll) b3 = button(self.tr("Manual"), OWRank.SelectManual) b4 = button(self.tr("Best ranked"), OWRank.SelectNBest) s = gui.spin(selMethBox, self, "nSelected", 1, 100, callback=self.nSelectedChanged) grid.addWidget(b1, 0, 0) grid.addWidget(b2, 1, 0) grid.addWidget(b3, 2, 0) grid.addWidget(b4, 3, 0) grid.addWidget(s, 3, 1) self.selectButtons.button(self.selectMethod).setChecked(True) selMethBox.layout().addLayout(grid) gui.auto_commit(self.controlArea, self, "auto_apply", "Commit", checkbox_label="Commit on any change") gui.rubber(self.controlArea) # Discrete and continuous table views are stacked self.ranksViewStack = QtGui.QStackedLayout() self.mainArea.layout().addLayout(self.ranksViewStack) self.discRanksView = QtGui.QTableView() self.ranksViewStack.addWidget(self.discRanksView) self.discRanksView.setSelectionBehavior(QtGui.QTableView.SelectRows) self.discRanksView.setSelectionMode(QtGui.QTableView.MultiSelection) self.discRanksView.setSortingEnabled(True) self.discRanksLabels = ["#"] + [m.shortname for m in self.discMeasures] self.discRanksModel = QtGui.QStandardItemModel(self) self.discRanksModel.setHorizontalHeaderLabels(self.discRanksLabels) self.discRanksProxyModel = MySortProxyModel(self) self.discRanksProxyModel.setSourceModel(self.discRanksModel) self.discRanksView.setModel(self.discRanksProxyModel) self.discRanksView.setColumnWidth(0, 20) self.discRanksView.sortByColumn(1, Qt.DescendingOrder) self.discRanksView.selectionModel().selectionChanged.connect( self.commit ) self.discRanksView.pressed.connect(self.onSelectItem) self.discRanksView.horizontalHeader().sectionClicked.connect( self.headerClick ) if self.headerState[0] is not None: self.discRanksView.horizontalHeader().restoreState( self.headerState[0] ) self.contRanksView = QtGui.QTableView() self.ranksViewStack.addWidget(self.contRanksView) self.contRanksView.setSelectionBehavior(QtGui.QTableView.SelectRows) self.contRanksView.setSelectionMode(QtGui.QTableView.MultiSelection) self.contRanksView.setSortingEnabled(True) self.contRanksLabels = ["#"] + [m.shortname for m in self.contMeasures] self.contRanksModel = QtGui.QStandardItemModel(self) self.contRanksModel.setHorizontalHeaderLabels(self.contRanksLabels) self.contRanksProxyModel = MySortProxyModel(self) self.contRanksProxyModel.setSourceModel(self.contRanksModel) self.contRanksView.setModel(self.contRanksProxyModel) self.discRanksView.setColumnWidth(0, 20) self.contRanksView.sortByColumn(1, Qt.DescendingOrder) self.contRanksView.selectionModel().selectionChanged.connect( self.commit ) self.contRanksView.pressed.connect(self.onSelectItem) self.contRanksView.horizontalHeader().sectionClicked.connect( self.headerClick ) if self.headerState[1] is not None: self.contRanksView.horizontalHeader().restoreState( self.headerState[1] ) # Switch the current view to Discrete self.switchRanksMode(0) self.resetInternals() self.updateDelegates() self.updateVisibleScoreColumns() self.resize(690, 500) self.measure_scores = table((len(self.measures), 0), None) self.learners = {} def switchRanksMode(self, index): """ Switch between discrete/continuous mode """ self.rankMode = index self.ranksViewStack.setCurrentIndex(index) if index == 0: self.ranksView = self.discRanksView self.ranksModel = self.discRanksModel self.ranksProxyModel = self.discRanksProxyModel self.measures = self.discMeasures else: self.ranksView = self.contRanksView self.ranksModel = self.contRanksModel self.ranksProxyModel = self.contRanksProxyModel self.measures = self.contMeasures self.updateVisibleScoreColumns() @check_sql_input def setData(self, data): self.error([0, 100]) self.resetInternals() if data is not None and not data.domain.class_var: data = None self.error(100, "Data does not have a target variable") self.data = data if self.data is not None: attrs = self.data.domain.attributes self.usefulAttributes = [attr for attr in attrs if attr.is_discrete or attr.is_continuous] if self.data.domain.has_continuous_class: self.switchRanksMode(1) elif self.data.domain.has_discrete_class: self.switchRanksMode(0) else: # String or other. self.error(0, "Cannot handle class variable type %r" % type(self.data.domain.class_var).__name__) self.ranksModel.setRowCount(len(attrs)) for i, a in enumerate(attrs): if a.is_discrete: v = len(a.values) else: v = "C" item = ScoreValueItem() item.setData(v, Qt.DisplayRole) self.ranksModel.setItem(i, 0, item) item = QtGui.QStandardItem(a.name) item.setData(gui.attributeIconDict[a], Qt.DecorationRole) self.ranksModel.setVerticalHeaderItem(i, item) shape = (len(self.measures) + len(self.learners), len(attrs)) self.measure_scores = table(shape, None) self.updateScores() self.selectMethodChanged() self.commit() def set_learner(self, learner, lid=None): if learner is None and lid is not None: del self.learners[lid] elif learner is not None: self.learners[lid] = score_meta( learner.name, learner.name, learner ) attrs_len = 0 if not self.data else len(self.data.domain.attributes) shape = (len(self.measures) + len(self.learners), attrs_len) self.measure_scores = table(shape, None) labels = [v.shortname for k, v in self.learners.items()] self.contRanksModel.setHorizontalHeaderLabels( self.contRanksLabels + labels ) self.discRanksModel.setHorizontalHeaderLabels( self.discRanksLabels + labels ) self.updateScores() self.commit() def updateScores(self, measuresMask=None): """ Update the current computed scores. If `measuresMask` is given it must be an list of bool values indicating what measures should be recomputed. """ if not self.data: return measures = self.measures + [v for k, v in self.learners.items()] # Invalidate all warnings self.warning(range(max(len(self.discMeasures), len(self.contMeasures)))) if measuresMask is None: # Update all selected measures measuresMask = [self.selectedMeasures.get(m.name) for m in self.measures] measuresMask = measuresMask + [v.name for k, v in self.learners.items()] data = self.data self.error(1) for index, (meas, mask) in enumerate(zip(measures, measuresMask)): if not mask: continue if index < len(self.measures): estimator = meas.score() self.measure_scores[index] = estimator(data) else: learner = meas.score if isinstance(learner, Learner) and \ not learner.check_learner_adequacy(self.data.domain): self.error(1, learner.learner_adequacy_err_msg) else: self.measure_scores[index] = meas.score.score_data(data) self.updateRankModel(measuresMask) self.ranksProxyModel.invalidate() self.selectMethodChanged() def updateRankModel(self, measuresMask=None): """ Update the rankModel. """ values = [] for i in range(len(self.measure_scores) + 1, self.ranksModel.columnCount()): self.ranksModel.removeColumn(i) for i, scores in enumerate(self.measure_scores): values_one = [] for j, score in enumerate(scores): values_one.append(score) item = self.ranksModel.item(j, i + 1) if not item: item = ScoreValueItem() self.ranksModel.setItem(j, i + 1, item) item.setData(score, Qt.DisplayRole) values.append(values_one) for i, vals in enumerate(values): valid_vals = [v for v in vals if v is not None] if valid_vals: vmin, vmax = min(valid_vals), max(valid_vals) for j, v in enumerate(vals): if v is not None: # Set the bar ratio role for i-th measure. ratio = float((v - vmin) / ((vmax - vmin) or 1)) item = self.ranksModel.item(j, i + 1) item.setData(ratio, gui.BarRatioRole) self.ranksView.setColumnWidth(0, 20) self.ranksView.resizeRowsToContents() def resetInternals(self): self.data = None self.usefulAttributes = [] self.ranksModel.setRowCount(0) def onSelectItem(self, index): """ Called when the user selects/unselects an item in the table view. """ self.selectMethod = OWRank.SelectManual # Manual self.selectButtons.button(self.selectMethod).setChecked(True) self.commit() def setSelectMethod(self, method): if self.selectMethod != method: self.selectMethod = method self.selectButtons.button(method).setChecked(True) self.selectMethodChanged() def selectMethodChanged(self): if self.selectMethod in [OWRank.SelectNone, OWRank.SelectAll, OWRank.SelectNBest]: self.autoSelection() def nSelectedChanged(self): self.selectMethod = OWRank.SelectNBest self.selectButtons.button(self.selectMethod).setChecked(True) self.selectMethodChanged() def autoSelection(self): selModel = self.ranksView.selectionModel() rowCount = self.ranksModel.rowCount() columnCount = self.ranksModel.columnCount() model = self.ranksProxyModel if self.selectMethod == OWRank.SelectNone: selection = QtGui.QItemSelection() elif self.selectMethod == OWRank.SelectAll: selection = QtGui.QItemSelection( model.index(0, 0), model.index(rowCount - 1, columnCount - 1) ) selModel.select(selection, QtGui.QItemSelectionModel.ClearAndSelect) elif self.selectMethod == OWRank.SelectNBest: nSelected = min(self.nSelected, rowCount) selection = QtGui.QItemSelection( model.index(0, 0), model.index(nSelected - 1, columnCount - 1) ) else: selection = QtGui.QItemSelection() selModel.select(selection, QtGui.QItemSelectionModel.ClearAndSelect) def headerClick(self, index): if index >= 1 and self.selectMethod == OWRank.SelectNBest: # Reselect the top ranked attributes self.autoSelection() # Store the header states disc = bytes(self.discRanksView.horizontalHeader().saveState()) cont = bytes(self.contRanksView.horizontalHeader().saveState()) self.headerState = (disc, cont) def measuresSelectionChanged(self, measure=None): """Measure selection has changed. Update column visibility. """ if measure is None: # Update all scores measuresMask = None else: # Update scores for shown column if they are not yet computed. shown = self.selectedMeasures.get(measure.name, False) index = self.measures.index(measure) if all(s is None for s in self.measure_scores[index]) and shown: measuresMask = [m == measure for m in self.measures] else: measuresMask = [False] * len(self.measures) self.updateScores(measuresMask) self.updateVisibleScoreColumns() def updateVisibleScoreColumns(self): """ Update the visible columns of the scores view. """ for i, measure in enumerate(self.measures): shown = self.selectedMeasures.get(measure.name) self.ranksView.setColumnHidden(i + 1, not shown) def updateDelegates(self): self.contRanksView.setItemDelegate( gui.ColoredBarItemDelegate(self) ) self.discRanksView.setItemDelegate( gui.ColoredBarItemDelegate(self) ) def send_report(self): if not self.data: return self.report_domain("Input", self.data.domain) self.report_table("Ranks", self.ranksView, num_format="{:.3f}") if self.out_domain_desc is not None: self.report_items("Output", self.out_domain_desc) def commit(self): selected = self.selectedAttrs() if not self.data or not selected: self.send("Reduced Data", None) self.out_domain_desc = None else: domain = Orange.data.Domain(selected, self.data.domain.class_var, metas=self.data.domain.metas) data = Orange.data.Table(domain, self.data) self.send("Reduced Data", data) self.out_domain_desc = report.describe_domain(data.domain) def selectedAttrs(self): if self.data: inds = self.ranksView.selectionModel().selectedRows(0) source = self.ranksProxyModel.mapToSource inds = map(source, inds) inds = [ind.row() for ind in inds] return [self.data.domain.attributes[i] for i in inds] else: return []
class OWTableToTimeseries(widget.OWWidget): name = 'As Timeseries' description = ('Reinterpret data table as a time series object.') icon = 'icons/TableToTimeseries.svg' priority = 10 inputs = [("Data", Table, 'set_data')] outputs = [(Output.TIMESERIES, Timeseries)] want_main_area = False resizing_enabled = False radio_sequential = settings.Setting(0) selected_attr = settings.Setting('') autocommit = settings.Setting(True) class Error(widget.OWWidget.Error): nan_times = widget.Msg('Some values of chosen sequential attribute ' '"{}" are NaN, which makes the values ' 'impossible to sort') def __init__(self): self.data = None box = gui.vBox(self.controlArea, 'Sequence') group = gui.radioButtons(box, self, 'radio_sequential', callback=self.on_changed) hbox = gui.hBox(box) gui.appendRadioButton(group, 'Sequential attribute:', insertInto=hbox) attrs_model = self.attrs_model = VariableListModel() combo_attrs = self.combo_attrs = gui.comboBox(hbox, self, 'selected_attr', callback=self.on_changed, sendSelectedValue=True) combo_attrs.setModel(attrs_model) gui.appendRadioButton(group, 'Sequence is implied by instance order', insertInto=box) gui.auto_commit(self.controlArea, self, 'autocommit', '&Apply') # TODO: seasonally adjust data (select attributes & season cycle length (e.g. 12 if you have monthly data)) def set_data(self, data): self.data = data self.attrs_model.clear() if self.data is None: self.commit() return if data.domain.has_continuous_attributes(): vars = [var for var in data.domain if isinstance(var, TimeVariable)] + \ [var for var in data.domain if var.is_continuous and not isinstance(var, TimeVariable)] self.attrs_model.wrap(vars) # self.selected_attr = vars.index(getattr(data, 'time_variable', vars[0])) self.selected_attr = data.time_variable.name if getattr( data, 'time_variable', False) else vars[0].name self.on_changed() def on_changed(self): self.commit() def commit(self): data = self.data self.Error.clear() if data is None or self.selected_attr not in data.domain: self.send(Output.TIMESERIES, None) return attrs = data.domain.attributes cvars = data.domain.class_vars metas = data.domain.metas X = data.X Y = np.column_stack((data.Y, )) # make 2d M = data.metas # Set sequence attribute if self.radio_sequential: for i in chain(('', ), range(10)): name = '__seq__' + str(i) if name not in data.domain: break time_var = ContinuousVariable(name) attrs = attrs.__class__((time_var, )) + attrs X = np.column_stack((np.arange(1, len(data) + 1), X)) data = Table(Domain(attrs, cvars, metas), X, Y, M) else: # Or make a sequence attribute one of the existing attributes # and sort all values according to it time_var = data.domain[self.selected_attr] values = Table.from_table(Domain([], [], [time_var]), source=data).metas.ravel() if np.isnan(values).any(): self.Error.nan_times(time_var.name) return ordered = np.argsort(values) if (ordered != np.arange(len(ordered))).any(): data = data[ordered] ts = Timeseries(data.domain, data) # TODO: ensure equidistant ts.time_variable = time_var self.send(Output.TIMESERIES, ts)
class OWDistanceMap(widget.OWWidget): name = "Distance Map" description = "Visualize a distance matrix." icon = "icons/DistanceMatrix.svg" priority = 1200 inputs = [("Distances", Orange.misc.DistMatrix, "set_distances")] outputs = [("Data", Orange.data.Table), ("Features", widget.AttributeList)] sorting = settings.Setting(0) colormap = settings.Setting(0) color_gamma = settings.Setting(0.0) color_low = settings.Setting(0.0) color_high = settings.Setting(1.0) annotation_idx = settings.Setting(0) autocommit = settings.Setting(True) def __init__(self, parent=None): super().__init__(parent) self.matrix = None self._tree = None self._ordered_tree = None self._sorted_matrix = None self._sort_indices = None self._selection = None box = gui.widgetBox(self.controlArea, "Element sorting", margin=0) gui.comboBox( box, self, "sorting", items=["None", "Clustering", "Clustering with ordered leaves"], callback=self._invalidate_ordering) box = gui.widgetBox(self.controlArea, "Colors") self.colormap_cb = gui.comboBox(box, self, "colormap", callback=self._update_color) self.colormap_cb.setIconSize(QSize(64, 16)) self.palettes = list(sorted(load_default_palettes())) init_color_combo(self.colormap_cb, self.palettes, QSize(64, 16)) self.colormap_cb.setCurrentIndex(self.colormap) form = QFormLayout(formAlignment=Qt.AlignLeft, labelAlignment=Qt.AlignLeft, fieldGrowthPolicy=QFormLayout.AllNonFixedFieldsGrow) # form.addRow( # "Gamma", # gui.hSlider(box, self, "color_gamma", minValue=0.0, maxValue=1.0, # step=0.05, ticks=True, intOnly=False, # createLabel=False, callback=self._update_color) # ) form.addRow( "Low", gui.hSlider(box, self, "color_low", minValue=0.0, maxValue=1.0, step=0.05, ticks=True, intOnly=False, createLabel=False, callback=self._update_color)) form.addRow( "High", gui.hSlider(box, self, "color_high", minValue=0.0, maxValue=1.0, step=0.05, ticks=True, intOnly=False, createLabel=False, callback=self._update_color)) box.layout().addLayout(form) box = gui.widgetBox(self.controlArea, "Annotations") self.annot_combo = gui.comboBox(box, self, "annotation_idx", callback=self._invalidate_annotations) self.annot_combo.setModel(itemmodels.VariableListModel()) self.annot_combo.model()[:] = ["None", "Enumeration"] self.controlArea.layout().addStretch() gui.auto_commit(self.controlArea, self, "autocommit", "Send data", "Auto send is on") self.view = pg.GraphicsView(background="w") self.mainArea.layout().addWidget(self.view) self.grid_widget = pg.GraphicsWidget() self.grid = QGraphicsGridLayout() self.grid_widget.setLayout(self.grid) self.viewbox = pg.ViewBox(enableMouse=False) self.viewbox.setAcceptedMouseButtons(Qt.NoButton) self.viewbox.setAcceptHoverEvents(False) self.grid.addItem(self.viewbox, 1, 1) self.left_dendrogram = DendrogramWidget( self.grid_widget, orientation=DendrogramWidget.Left) self.left_dendrogram.setAcceptedMouseButtons(Qt.NoButton) self.left_dendrogram.setAcceptHoverEvents(False) self.top_dendrogram = DendrogramWidget( self.grid_widget, orientation=DendrogramWidget.Top) self.top_dendrogram.setAcceptedMouseButtons(Qt.NoButton) self.top_dendrogram.setAcceptHoverEvents(False) self.grid.addItem(self.left_dendrogram, 1, 0) self.grid.addItem(self.top_dendrogram, 0, 1) self.right_labels = TextList(alignment=Qt.AlignLeft) self.bottom_labels = TextList(orientation=Qt.Horizontal, alignment=Qt.AlignRight) self.grid.addItem(self.right_labels, 1, 2) self.grid.addItem(self.bottom_labels, 2, 1) self.view.setCentralItem(self.grid_widget) self.left_dendrogram.hide() self.top_dendrogram.hide() self.right_labels.hide() self.bottom_labels.hide() self.matrix_item = None self.dendrogram = None self.grid_widget.scene().installEventFilter(self) def set_distances(self, matrix): self.clear() self.error(0) if matrix is not None: N, _ = matrix.X.shape if N < 2: self.error(0, "Empty distance matrix.") matrix = None self.matrix = matrix if matrix is not None: self.set_items(matrix.row_items, matrix.axis) else: self.set_items(None) def set_items(self, items, axis=1): self.items = items model = self.annot_combo.model() if items is None: model[:] = ["None", "Enumeration"] elif not axis: model[:] = ["None", "Enumeration", "Attribute names"] self.annotation_idx = 2 elif isinstance(items, Orange.data.Table): model[:] = ["None", "Enumeration"] + list(items.domain) elif isinstance(items, list) and \ all(isinstance(item, Orange.data.Variable) for item in items): model[:] = ["None", "Enumeration", "Name"] else: model[:] = ["None", "Enumeration"] self.annotation_idx = min(self.annotation_idx, len(model) - 1) def clear(self): self.matrix = None self.cluster = None self._tree = None self._ordered_tree = None self._sorted_matrix = None self._selection = [] self._clear_plot() def handleNewSignals(self): if self.matrix is not None: self._update_ordering() self._setup_scene() self._update_labels() self.unconditional_commit() def _clear_plot(self): def remove(item): item.setParentItem(None) item.scene().removeItem(item) if self.matrix_item: remove(self.matrix_item) self.matrix_item = None self.top_dendrogram.hide() self.left_dendrogram.hide() self._set_labels(None) def _cluster_tree(self): if self._tree is None: self._tree = hierarchical.dist_matrix_clustering(self.matrix) return self._tree def _ordered_cluster_tree(self): if self._ordered_tree is None: tree = self._cluster_tree() self._ordered_tree = \ hierarchical.optimal_leaf_ordering(tree, self.matrix) return self._ordered_tree def _setup_scene(self): self.matrix_item = DistanceMapItem(self._sorted_matrix) # Scale the y axis to compensate for pg.ViewBox's y axis invert self.matrix_item.scale(1, -1) self.viewbox.addItem(self.matrix_item) # Set fixed view box range. h, w = self._sorted_matrix.shape self.viewbox.setRange(QRectF(0, -h, w, h), padding=0) self.matrix_item.selectionChanged.connect(self._invalidate_selection) if self.sorting == 0: tree = None elif self.sorting == 1: tree = self._cluster_tree() else: tree = self._ordered_cluster_tree() self._set_displayed_dendrogram(tree) self._update_color() def _set_displayed_dendrogram(self, root): self.left_dendrogram.set_root(root) self.top_dendrogram.set_root(root) self.left_dendrogram.setVisible(root is not None) self.top_dendrogram.setVisible(root is not None) constraint = 0 if root is None else -1 # 150 self.left_dendrogram.setMaximumWidth(constraint) self.top_dendrogram.setMaximumHeight(constraint) def _invalidate_ordering(self): self._sorted_matrix = None if self.matrix is not None: self._update_ordering() self._setup_scene() def _update_ordering(self): if self.sorting == 0: self._sorted_matrix = self.matrix.X self._sort_indices = None else: if self.sorting == 1: tree = self._cluster_tree() elif self.sorting == 2: tree = self._ordered_cluster_tree() leaves = hierarchical.leaves(tree) indices = numpy.array([leaf.value.index for leaf in leaves]) X = self.matrix.X self._sorted_matrix = X[indices[:, numpy.newaxis], indices[numpy.newaxis, :]] self._sort_indices = indices def _invalidate_annotations(self): if self.matrix is not None: self._update_labels() def _update_labels(self, ): if self.annotation_idx == 0: labels = None elif self.annotation_idx == 1: labels = [str(i + 1) for i in range(self.matrix.dim[0])] elif self.annot_combo.model()[ self.annotation_idx] == "Attribute names": attr = self.matrix.row_items.domain.attributes labels = [str(attr[i]) for i in range(self.matrix.dim[0])] elif self.annotation_idx == 2 and \ isinstance(self.items, widget.AttributeList): labels = [v.name for v in self.items] elif isinstance(self.items, Orange.data.Table): var = self.annot_combo.model()[self.annotation_idx] column, _ = self.items.get_column_view(var) labels = [var.repr_val(value) for value in column] self._set_labels(labels) def _set_labels(self, labels): self._labels = labels if labels and self.sorting: sortind = self._sort_indices labels = [labels[i] for i in sortind] for textlist in [self.right_labels, self.bottom_labels]: textlist.set_labels(labels or []) textlist.setVisible(bool(labels)) constraint = -1 if labels else 0 self.right_labels.setMaximumWidth(constraint) self.bottom_labels.setMaximumHeight(constraint) def _update_color(self): if self.matrix_item: name, colors = self.palettes[self.colormap] n, colors = max(colors.items()) colors = numpy.array(colors, dtype=numpy.ubyte) low, high = self.color_low * 255, self.color_high * 255 points = numpy.linspace(low, high, n) space = numpy.linspace(0, 255, 255) r = numpy.interp(space, points, colors[:, 0], left=255, right=0) g = numpy.interp(space, points, colors[:, 1], left=255, right=0) b = numpy.interp(space, points, colors[:, 2], left=255, right=0) colortable = numpy.c_[r, g, b] self.matrix_item.setLookupTable(colortable) def _invalidate_selection(self): ranges = self.matrix_item.selections() ranges = reduce(iadd, ranges, []) indices = reduce(iadd, ranges, []) if self.sorting: sortind = self._sort_indices indices = [sortind[i] for i in indices] self._selection = list(sorted(set(indices))) self.commit() def commit(self): datasubset = None featuresubset = None if not self._selection: pass elif isinstance(self.items, Orange.data.Table): indices = self._selection if self.matrix.axis == 1: datasubset = self.items.from_table_rows(self.items, indices) elif self.matrix.axis == 0: domain = Orange.data.Domain( [self.items.domain[i] for i in indices], self.items.domain.class_vars, self.items.domain.metas) datasubset = Orange.data.Table.from_table(domain, self.items) elif isinstance(self.items, widget.AttributeList): subset = [self.items[i] for i in self._selection] featuresubset = widget.AttributeList(subset) self.send("Data", datasubset) self.send("Features", featuresubset)
class OWRandomForest(OWBaseLearner): name = "随机森林(Random Forest)" description = "使用一组决策树进行预测。" icon = "icons/RandomForest.svg" replaces = [ "Orange.widgets.classify.owrandomforest.OWRandomForest", "Orange.widgets.regression.owrandomforestregression.OWRandomForestRegression", ] priority = 40 keywords = [] LEARNER = RandomForestLearner n_estimators = settings.Setting(10) max_features = settings.Setting(5) use_max_features = settings.Setting(False) use_random_state = settings.Setting(False) max_depth = settings.Setting(3) use_max_depth = settings.Setting(False) min_samples_split = settings.Setting(5) use_min_samples_split = settings.Setting(True) index_output = settings.Setting(0) class Error(OWBaseLearner.Error): not_enough_features = Msg("Insufficient number of attributes ({})") def add_main_layout(self): # this is part of init, pylint: disable=attribute-defined-outside-init box = gui.vBox(self.controlArea, '基本特性') self.n_estimators_spin = gui.spin(box, self, "n_estimators", minv=1, maxv=10000, controlWidth=80, alignment=Qt.AlignRight, label="树的数量: ", callback=self.settings_changed) self.max_features_spin = gui.spin( box, self, "max_features", 2, 50, controlWidth=80, label="每次拆分时考虑的属性数: ", callback=self.settings_changed, checked="use_max_features", checkCallback=self.settings_changed, alignment=Qt.AlignRight, ) self.random_state = gui.checkBox(box, self, "use_random_state", label="可重复的训练", callback=self.settings_changed) box = gui.vBox(self.controlArea, "生长控制") self.max_depth_spin = gui.spin(box, self, "max_depth", 1, 50, controlWidth=80, label="单个树的极限深度: ", alignment=Qt.AlignRight, callback=self.settings_changed, checked="use_max_depth", checkCallback=self.settings_changed) self.min_samples_split_spin = gui.spin( box, self, "min_samples_split", 2, 1000, controlWidth=80, label="小于...不要拆分: ", callback=self.settings_changed, checked="use_min_samples_split", checkCallback=self.settings_changed, alignment=Qt.AlignRight) def create_learner(self): common_args = {"n_estimators": self.n_estimators} if self.use_max_features: common_args["max_features"] = self.max_features if self.use_random_state: common_args["random_state"] = 0 if self.use_max_depth: common_args["max_depth"] = self.max_depth if self.use_min_samples_split: common_args["min_samples_split"] = self.min_samples_split return self.LEARNER(preprocessors=self.preprocessors, **common_args) def check_data(self): self.Error.not_enough_features.clear() if super().check_data(): n_features = len(self.data.domain.attributes) if self.use_max_features and self.max_features > n_features: self.Error.not_enough_features(n_features) self.valid_data = False return self.valid_data def get_learner_parameters(self): """Called by send report to list the parameters of the learner.""" return (("Number of trees", self.n_estimators), ("Maximal number of considered features", self.max_features if self.use_max_features else "unlimited"), ("Replicable training", ["No", "Yes"][self.use_random_state]), ("Maximal tree depth", self.max_depth if self.use_max_depth else "unlimited"), ("Stop splitting nodes with maximum instances", self.min_samples_split if self.use_min_samples_split else "unlimited"))
class OWImpute(OWWidget): name = "Impute" description = "Impute missing values in the data table." icon = "icons/Impute.svg" priority = 2130 inputs = [("Data", Orange.data.Table, "set_data"), ("Learner", Learner, "set_learner")] outputs = [("Data", Orange.data.Table)] METHODS = METHODS settingsHandler = settings.DomainContextHandler() default_method = settings.Setting(1) variable_methods = settings.ContextSetting({}) autocommit = settings.Setting(True) want_main_area = False resizing_enabled = False def __init__(self): super().__init__() self.modified = False box = group_box(self.tr("Default method"), layout=layout(Qt.Vertical)) self.controlArea.layout().addWidget(box) bgroup = QButtonGroup() for i, m in enumerate(self.METHODS[1:-1], 1): b = radio_button(m.name, checked=i == self.default_method, group=bgroup, group_id=i) box.layout().addWidget(b) self.defbggroup = bgroup bgroup.buttonClicked[int].connect(self.set_default_method) box = group_box(self.tr("Individual attribute settings"), layout=layout(Qt.Horizontal)) self.controlArea.layout().addWidget(box) self.varview = QtGui.QListView( selectionMode=QtGui.QListView.ExtendedSelection) self.varview.setItemDelegate(DisplayFormatDelegate()) self.varmodel = itemmodels.VariableListModel() self.varview.setModel(self.varmodel) self.varview.selectionModel().selectionChanged.connect( self._on_var_selection_changed) self.selection = self.varview.selectionModel() box.layout().addWidget(self.varview) method_layout = layout(Qt.Vertical, margins=0) box.layout().addLayout(method_layout) methodbox = group_box(layout=layout(Qt.Vertical)) bgroup = QButtonGroup() for i, m in enumerate(self.METHODS): b = radio_button(m.name, group=bgroup, group_id=i) methodbox.layout().addWidget(b) assert self.METHODS[-1].short == "value" self.value_stack = value_stack = QStackedLayout() self.value_combo = QComboBox( minimumContentsLength=8, sizeAdjustPolicy=QComboBox.AdjustToMinimumContentsLength, activated=self._on_value_changed) self.value_line = QLineEdit(editingFinished=self._on_value_changed) self.value_line.setValidator(QDoubleValidator()) value_stack.addWidget(self.value_combo) value_stack.addWidget(self.value_line) methodbox.layout().addLayout(value_stack) bgroup.buttonClicked[int].connect( self.set_method_for_current_selection) reset_button = push_button("Restore all to default", clicked=self.reset_var_methods, default=False, autoDefault=False) method_layout.addWidget(methodbox) method_layout.addStretch(2) method_layout.addWidget(reset_button) self.varmethodbox = methodbox self.varbgroup = bgroup box = gui.auto_commit(self.controlArea, self, "autocommit", "Commit", orientation="horizontal", checkbox_label="Commit on any change") box.layout().insertSpacing(0, 80) box.layout().insertWidget(0, self.report_button) self.data = None self.learner = None def set_default_method(self, index): """ Set the current selected default imputation method. """ if self.default_method != index: self.default_method = index self.defbggroup.button(index).setChecked(True) self._invalidate() @check_sql_input def set_data(self, data): self.closeContext() self.clear() self.data = data if data is not None: self.varmodel[:] = data.domain.variables self.openContext(data.domain) self.restore_state(self.variable_methods) itemmodels.select_row(self.varview, 0) self.unconditional_commit() def set_learner(self, learner): self.learner = learner if self.data is not None and \ any(state.method.short == "model" for state in map(self.state_for_column, range(len(self.data.domain)))): self.commit() def restore_state(self, state): for i, var in enumerate(self.varmodel): key = variable_key(var) if key in state: index = self.varmodel.index(i) self.varmodel.setData(index, state[key], Qt.UserRole) def clear(self): self.varmodel[:] = [] self.variable_methods = {} self.data = None self.modified = False def state_for_column(self, column): """ #:: int -> State Return the effective imputation state for `column`. :param int column: :rtype State: """ var = self.varmodel[column] state = self.variable_methods.get(variable_key(var), None) if state is None or state.method == METHODS[0]: state = State(METHODS[self.default_method], ()) return state def imputed_vars_for_column(self, column): state = self.state_for_column(column) data = self.data var = data.domain[column] method, params = state if method.short == "leave": return var elif method.short == "drop": return var elif method.short == "avg": return impute.Average()(data, var) elif method.short == "model": learner = (self.learner if self.learner is not None else Orange.classification.SimpleTreeLearner()) return impute.Model(learner)(data, var) elif method.short == "random": return impute.Random()(data, var) elif method.short == "value": return impute.Default(float(params[0]))(data, var) elif method.short == "as_value": return impute.AsValue()(data, var) else: assert False def commit(self): if self.data is not None: varstates = [(var, self.state_for_column(i)) for i, var in enumerate(self.varmodel)] attrs = [] class_vars = [] filter_columns = [] for i, (var, state) in enumerate(varstates): if state.method.short == "drop": imputedvars = [var] filter_columns.append(i) elif state.method.short == "leave": imputedvars = [var] else: imputedvars = self.imputed_vars_for_column(i) if imputedvars is None: imputedvars = [] elif isinstance(imputedvars, Orange.data.Variable): imputedvars = [imputedvars] if i < len(self.data.domain.attributes): attrs.extend(imputedvars) else: class_vars.extend(imputedvars) domain = Orange.data.Domain(attrs, class_vars, self.data.domain.metas) data = self.data.from_table(domain, self.data) if filter_columns: filter_ = Orange.data.filter.IsDefined(filter_columns) data = filter_(data) else: data = None self.send("Data", data) self.modified = False def send_report(self): specific = [] for var in self.varmodel: state = self.variable_methods.get(variable_key(var), None) if state is not None and state.method.short: if state.method.short == "value": if var.is_continuous: specific.append("{} (impute value {})".format( var.name, float(state.params[0]))) else: specific.append("{} (impute value '{}'".format( var.name, var.values[state.params[0]])) else: specific.append("{} ({})".format( var.name, state.method.name.lower())) default = self.METHODS[self.default_method].name if specific: self.report_items((("Default method", default), ("Specific imputers", ", ".join(specific)))) else: self.report_items((("Method", default), )) def _invalidate(self): self.modified = True self.commit() def _on_var_selection_changed(self): indexes = self.selection.selectedIndexes() vars = [self.varmodel[index.row()] for index in indexes] defstate = State(METHODS[0], ()) states = [ self.variable_methods.get(variable_key(var), defstate) for var in vars ] all_cont = all(var.is_continuous for var in vars) states = list(unique(states)) method = None params = () state = None if len(states) == 1: state = states[0] method, params = state mindex = METHODS.index(method) self.varbgroup.button(mindex).setChecked(True) elif self.varbgroup.checkedButton() is not None: self.varbgroup.setExclusive(False) self.varbgroup.checkedButton().setChecked(False) self.varbgroup.setExclusive(True) values, enabled, stack_index = [], False, 0 value, value_index = "0.0", 0 if all_cont: enabled, stack_index = True, 1 if method is not None and method.short == "value": value = params[0] elif len(vars) == 1 and vars[0].is_discrete: values, enabled, stack_index = vars[0].values, True, 0 if method is not None and method.short == "value": try: value_index = values.index(params[0]) except IndexError: pass self.value_stack.setCurrentIndex(stack_index) self.value_stack.setEnabled(enabled) if stack_index == 0: self.value_combo.clear() self.value_combo.addItems(values) self.value_combo.setCurrentIndex(value_index) else: self.value_line.setText(value) def _on_value_changed(self): # The "fixed" value in the widget has been changed by the user. index = self.varbgroup.checkedId() self.set_method_for_current_selection(index) def set_method_for_current_selection(self, methodindex): indexes = self.selection.selectedIndexes() self.set_method_for_indexes(indexes, methodindex) def set_method_for_indexes(self, indexes, methodindex): method = METHODS[methodindex] params = (None, ) if method.short == "value": if self.value_stack.currentIndex() == 0: value = self.value_combo.currentIndex() else: value = self.value_line.text() params = (value, ) elif method.short == "model": params = ("model", ) state = State(method, params) for index in indexes: self.varmodel.setData(index, state, Qt.UserRole) var = self.varmodel[index.row()] self.variable_methods[variable_key(var)] = state self._invalidate() def reset_var_methods(self): indexes = map(self.varmodel.index, range(len(self.varmodel))) self.set_method_for_indexes(indexes, 0)
class OWCalibrationPlot(widget.OWWidget): name = "Calibration Plot" description = "Calibration plot based on evaluation of classifiers." icon = "icons/CalibrationPlot.svg" priority = 1030 keywords = [] class Inputs: evaluation_results = Input("Evaluation Results", Results) class Outputs: calibrated_model = Output("Calibrated Model", Model) class Error(widget.OWWidget.Error): non_discrete_target = Msg("Calibration plot requires a categorical " "target variable.") empty_input = widget.Msg("Empty result on input. Nothing to display.") nan_classes = \ widget.Msg("Remove test data instances with unknown classes.") all_target_class = widget.Msg( "All data instances belong to target class.") no_target_class = widget.Msg( "No data instances belong to target class.") class Warning(widget.OWWidget.Warning): omitted_folds = widget.Msg( "Test folds where all data belongs to (non)-target are not shown.") omitted_nan_prob_points = widget.Msg( "Instance for which the model couldn't compute probabilities are" "skipped.") no_valid_data = widget.Msg("No valid data for model(s) {}") class Information(widget.OWWidget.Information): no_output = Msg("Can't output a model: {}") settingsHandler = EvaluationResultsContextHandler() target_index = settings.ContextSetting(0) selected_classifiers = settings.ContextSetting([]) score = settings.Setting(0) output_calibration = settings.Setting(0) fold_curves = settings.Setting(False) display_rug = settings.Setting(True) threshold = settings.Setting(0.5) visual_settings = settings.Setting({}, schema_only=True) auto_commit = settings.Setting(True) graph_name = "plot" def __init__(self): super().__init__() self.results = None self.scores = None self.classifier_names = [] self.colors = [] self.line = None self._last_score_value = -1 box = gui.vBox(self.controlArea, box="Settings") self.target_cb = gui.comboBox(box, self, "target_index", label="Target:", orientation=Qt.Horizontal, callback=self.target_index_changed, contentsLength=8, searchable=True) gui.checkBox(box, self, "display_rug", "Show rug", callback=self._on_display_rug_changed) gui.checkBox(box, self, "fold_curves", "Curves for individual folds", callback=self._replot) self.classifiers_list_box = gui.listBox( self.controlArea, self, "selected_classifiers", "classifier_names", box="Classifier", selectionMode=QListWidget.ExtendedSelection, sizePolicy=(QSizePolicy.Preferred, QSizePolicy.Preferred), sizeHint=QSize(150, 40), callback=self._on_selection_changed) box = gui.vBox(self.controlArea, "Metrics") combo = gui.comboBox(box, self, "score", items=(metric.name for metric in Metrics), callback=self.score_changed) self.explanation = gui.widgetLabel(box, wordWrap=True, fixedWidth=combo.sizeHint().width()) self.explanation.setContentsMargins(8, 8, 0, 0) font = self.explanation.font() font.setPointSizeF(0.85 * font.pointSizeF()) self.explanation.setFont(font) gui.radioButtons(box, self, value="output_calibration", btnLabels=("Sigmoid calibration", "Isotonic calibration"), label="Output model calibration", callback=self.apply) self.info_box = gui.widgetBox(self.controlArea, "Info") self.info_label = gui.widgetLabel(self.info_box) gui.auto_apply(self.buttonsArea, self, "auto_commit", commit=self.apply) self.plotview = pg.GraphicsView(background="w") axes = { "bottom": AxisItem(orientation="bottom"), "left": AxisItem(orientation="left") } self.plot = pg.PlotItem(enableMenu=False, axisItems=axes) self.plot.parameter_setter = ParameterSetter(self.plot) self.plot.setMouseEnabled(False, False) self.plot.hideButtons() for axis_name in ("bottom", "left"): axis = self.plot.getAxis(axis_name) axis.setPen(pg.mkPen(color=0.0)) # Remove the condition (that is, allow setting this for bottom # axis) when pyqtgraph is fixed # Issue: https://github.com/pyqtgraph/pyqtgraph/issues/930 # Pull request: https://github.com/pyqtgraph/pyqtgraph/pull/932 if axis_name != "bottom": # remove if when pyqtgraph is fixed axis.setStyle(stopAxisAtTick=(True, True)) self.plot.setRange(xRange=(0.0, 1.0), yRange=(0.0, 1.0), padding=0.05) self.plotview.setCentralItem(self.plot) self.mainArea.layout().addWidget(self.plotview) self._set_explanation() VisualSettingsDialog(self, self.plot.parameter_setter.initial_settings) @Inputs.evaluation_results def set_results(self, results): self.closeContext() self.clear() self.Error.clear() self.Information.clear() self.results = None if results is not None: if not results.domain.has_discrete_class: self.Error.non_discrete_target() elif not results.actual.size: self.Error.empty_input() elif np.any(np.isnan(results.actual)): self.Error.nan_classes() else: self.results = results self._initialize(results) class_var = self.results.domain.class_var self.target_index = int(len(class_var.values) == 2) self.openContext(class_var, self.classifier_names) self._replot() self.apply() def clear(self): self.plot.clear() self.results = None self.classifier_names = [] self.selected_classifiers = [] self.target_cb.clear() self.colors = [] def target_index_changed(self): if len(self.results.domain.class_var.values) == 2: self.threshold = 1 - self.threshold self._set_explanation() self._replot() self.apply() def score_changed(self): self._set_explanation() self._replot() if self._last_score_value != self.score: self.apply() self._last_score_value = self.score def _set_explanation(self): explanation = Metrics[self.score].explanation if explanation: self.explanation.setText(explanation) self.explanation.show() else: self.explanation.hide() if self.score == 0: self.controls.output_calibration.show() self.info_box.hide() else: self.controls.output_calibration.hide() self.info_box.show() axis = self.plot.getAxis("bottom") axis.setLabel("Predicted probability" if self.score == 0 else "Threshold probability to classify as positive") axis = self.plot.getAxis("left") axis.setLabel(Metrics[self.score].name) def _initialize(self, results): n = len(results.predicted) names = getattr(results, "learner_names", None) if names is None: names = ["#{}".format(i + 1) for i in range(n)] self.classifier_names = names self.colors = colorpalettes.get_default_curve_colors(n) for i in range(n): item = self.classifiers_list_box.item(i) item.setIcon(colorpalettes.ColorIcon(self.colors[i])) self.selected_classifiers = list(range(n)) self.target_cb.addItems(results.domain.class_var.values) self.target_index = 0 def _rug(self, data, pen_args): color = pen_args["pen"].color() rh = 0.025 rug_x = np.c_[data.probs[:-1], data.probs[:-1]] rug_x_true = rug_x[data.ytrue].ravel() rug_x_false = rug_x[~data.ytrue].ravel() rug_y_true = np.ones_like(rug_x_true) rug_y_true[1::2] = 1 - rh rug_y_false = np.zeros_like(rug_x_false) rug_y_false[1::2] = rh self.plot.plot(rug_x_false, rug_y_false, pen=color, connect="pairs", antialias=True) self.plot.plot(rug_x_true, rug_y_true, pen=color, connect="pairs", antialias=True) def plot_metrics(self, data, metrics, pen_args): if metrics is None: return self._prob_curve(data.ytrue, data.probs[:-1], pen_args) ys = [metric(data) for metric in metrics] for y in ys: self.plot.plot(data.probs, y, **pen_args) return data.probs, ys def _prob_curve(self, ytrue, probs, pen_args): xmin, xmax = probs.min(), probs.max() x = np.linspace(xmin, xmax, 100) if xmax != xmin: f = gaussian_smoother(probs, ytrue, sigma=0.15 * (xmax - xmin)) y = f(x) else: y = np.full(100, xmax) self.plot.plot(x, y, symbol="+", symbolSize=4, **pen_args) return x, (y, ) def _setup_plot(self): target = self.target_index results = self.results metrics = Metrics[self.score].functions plot_folds = self.fold_curves and results.folds is not None self.scores = [] if not self._check_class_presence(results.actual == target): return self.Warning.omitted_folds.clear() self.Warning.omitted_nan_prob_points.clear() no_valid_models = [] shadow_width = 4 + 4 * plot_folds for clsf in self.selected_classifiers: data = Curves.from_results(results, target, clsf) if data.tot == 0: # all probabilities are nan no_valid_models.append(clsf) continue if data.tot != results.probabilities.shape[1]: # some are nan self.Warning.omitted_nan_prob_points() color = self.colors[clsf] pen_args = dict(pen=pg.mkPen(color, width=1), antiAlias=True, shadowPen=pg.mkPen(color.lighter(160), width=shadow_width)) self.scores.append((self.classifier_names[clsf], self.plot_metrics(data, metrics, pen_args))) if self.display_rug: self._rug(data, pen_args) if plot_folds: pen_args = dict(pen=pg.mkPen(color, width=1, style=Qt.DashLine), antiAlias=True) for fold in range(len(results.folds)): fold_results = results.get_fold(fold) fold_curve = Curves.from_results(fold_results, target, clsf) # Can't check this before: p and n can be 0 because of # nan probabilities if fold_curve.p * fold_curve.n == 0: self.Warning.omitted_folds() self.plot_metrics(fold_curve, metrics, pen_args) if no_valid_models: self.Warning.no_valid_data(", ".join(self.classifier_names[i] for i in no_valid_models)) if self.score == 0: self.plot.plot([0, 1], [0, 1], antialias=True) else: self.line = pg.InfiniteLine( pos=self.threshold, movable=True, pen=pg.mkPen(color="k", style=Qt.DashLine, width=2), hoverPen=pg.mkPen(color="k", style=Qt.DashLine, width=3), bounds=(0, 1), ) self.line.sigPositionChanged.connect(self.threshold_change) self.line.sigPositionChangeFinished.connect( self.threshold_change_done) self.plot.addItem(self.line) def _check_class_presence(self, ytrue): self.Error.all_target_class.clear() self.Error.no_target_class.clear() if np.max(ytrue) == 0: self.Error.no_target_class() return False if np.min(ytrue) == 1: self.Error.all_target_class() return False return True def _replot(self): self.plot.clear() if self.results is not None: self._setup_plot() self._update_info() def _on_display_rug_changed(self): self._replot() def _on_selection_changed(self): self._replot() self.apply() def threshold_change(self): self.threshold = round(self.line.pos().x(), 2) self.line.setPos(self.threshold) self._update_info() def get_info_text(self, short): if short: def elided(s): return s[:17] + "..." if len(s) > 20 else s text = f"""<table> <tr> <th align='right'>Threshold: p=</th> <td colspan='4'>{self.threshold:.2f}<br/></td> </tr>""" else: def elided(s): return s text = f"""<table> <tr> <th align='right'>Threshold:</th> <td colspan='4'>p = {self.threshold:.2f}<br/> </td> <tr/> </tr>""" if self.scores is not None: short_names = Metrics[self.score].short_names if short_names: text += f"""<tr> <th></th> {"<td></td>".join(f"<td align='right'>{n}</td>" for n in short_names)} </tr>""" for name, (probs, curves) in self.scores: ind = min(np.searchsorted(probs, self.threshold), len(probs) - 1) text += f"<tr><th align='right'>{elided(name)}:</th>" text += "<td>/</td>".join(f'<td>{curve[ind]:.3f}</td>' for curve in curves) text += "</tr>" text += "<table>" return text return None def _update_info(self): self.info_label.setText(self.get_info_text(short=True)) def threshold_change_done(self): self.apply() def apply(self): self.Information.no_output.clear() wrapped = None results = self.results if results is not None: problems = [ msg for condition, msg in ( (len(results.folds) > 1, "each training data sample produces a different model"), (results.models is None, "test results do not contain stored models - try testing " "on separate data or on training data"), (len(self.selected_classifiers) != 1, "select a single model - the widget can output only one"), (self.score != 0 and len(results.domain.class_var.values) != 2, "cannot calibrate non-binary classes")) if condition ] if len(problems) == 1: self.Information.no_output(problems[0]) elif problems: self.Information.no_output("".join(f"\n - {problem}" for problem in problems)) else: clsf_idx = self.selected_classifiers[0] model = results.models[0, clsf_idx] if self.score == 0: cal_learner = CalibratedLearner(None, self.output_calibration) wrapped = cal_learner.get_model( model, results.actual, results.probabilities[clsf_idx]) else: threshold = [1 - self.threshold, self.threshold][self.target_index] wrapped = ThresholdClassifier(model, threshold) self.Outputs.calibrated_model.send(wrapped) def send_report(self): if self.results is None: return self.report_items( (("Target class", self.target_cb.currentText()), ("Output model calibration", self.score == 0 and ("Sigmoid calibration", "Isotonic calibration")[self.output_calibration]))) caption = report.list_legend(self.classifiers_list_box, self.selected_classifiers) self.report_plot() self.report_caption(caption) self.report_caption(self.controls.score.currentText()) if self.score != 0: self.report_raw(self.get_info_text(short=False)) def set_visual_settings(self, key, value): self.plot.parameter_setter.set_parameter(key, value) self.visual_settings[key] = value
class OWROCAnalysis(widget.OWWidget): name = "ROC Analysis" description = "Display the Receiver Operating Characteristics curve " \ "based on the evaluation of classifiers." icon = "icons/ROCAnalysis.svg" priority = 1010 inputs = [("Evaluation Results", Orange.evaluation.Results, "set_results")] target_index = settings.Setting(0) selected_classifiers = [] display_perf_line = settings.Setting(True) display_def_threshold = settings.Setting(True) fp_cost = settings.Setting(500) fn_cost = settings.Setting(500) target_prior = settings.Setting(50.0) #: ROC Averaging Types Merge, Vertical, Threshold, NoAveraging = 0, 1, 2, 3 roc_averaging = settings.Setting(Merge) display_convex_hull = settings.Setting(False) display_convex_curve = settings.Setting(False) graph_name = "plot" def __init__(self): super().__init__() self.results = None self.classifier_names = [] self.perf_line = None self.colors = [] self._curve_data = {} self._plot_curves = {} self._rocch = None self._perf_line = None box = gui.vBox(self.controlArea, "Plot") tbox = gui.vBox(box, "Target Class") tbox.setFlat(True) self.target_cb = gui.comboBox(tbox, self, "target_index", callback=self._on_target_changed, contentsLength=8) cbox = gui.vBox(box, "Classifiers") cbox.setFlat(True) self.classifiers_list_box = gui.listBox( cbox, self, "selected_classifiers", "classifier_names", selectionMode=QListView.MultiSelection, callback=self._on_classifiers_changed) abox = gui.vBox(box, "Combine ROC Curves From Folds") abox.setFlat(True) gui.comboBox(abox, self, "roc_averaging", items=[ "Merge Predictions from Folds", "Mean TP Rate", "Mean TP and FP at Threshold", "Show Individual Curves" ], callback=self._replot) hbox = gui.vBox(box, "ROC Convex Hull") hbox.setFlat(True) gui.checkBox(hbox, self, "display_convex_curve", "Show convex ROC curves", callback=self._replot) gui.checkBox(hbox, self, "display_convex_hull", "Show ROC convex hull", callback=self._replot) box = gui.vBox(self.controlArea, "Analysis") gui.checkBox(box, self, "display_def_threshold", "Default threshold (0.5) point", callback=self._on_display_def_threshold_changed) gui.checkBox(box, self, "display_perf_line", "Show performance line", callback=self._on_display_perf_line_changed) grid = QGridLayout() ibox = gui.indentedBox(box, orientation=grid) sp = gui.spin(box, self, "fp_cost", 1, 1000, 10, callback=self._on_display_perf_line_changed) grid.addWidget(QLabel("FP Cost:"), 0, 0) grid.addWidget(sp, 0, 1) sp = gui.spin(box, self, "fn_cost", 1, 1000, 10, callback=self._on_display_perf_line_changed) grid.addWidget(QLabel("FN Cost:")) grid.addWidget(sp, 1, 1) sp = gui.spin(box, self, "target_prior", 1, 99, callback=self._on_display_perf_line_changed) sp.setSuffix("%") sp.addAction(QAction("Auto", sp)) grid.addWidget(QLabel("Prior target class probability:")) grid.addWidget(sp, 2, 1) self.plotview = pg.GraphicsView(background="w") self.plotview.setFrameStyle(QFrame.StyledPanel) self.plot = pg.PlotItem() self.plot.getViewBox().setMenuEnabled(False) self.plot.getViewBox().setMouseEnabled(False, False) pen = QPen(self.palette().color(QPalette.Text)) tickfont = QFont(self.font()) tickfont.setPixelSize(max(int(tickfont.pixelSize() * 2 // 3), 11)) axis = self.plot.getAxis("bottom") axis.setTickFont(tickfont) axis.setPen(pen) axis.setLabel("FP Rate (1-Specificity)") axis = self.plot.getAxis("left") axis.setTickFont(tickfont) axis.setPen(pen) axis.setLabel("TP Rate (Sensitivity)") self.plot.showGrid(True, True, alpha=0.1) self.plot.setRange(xRange=(0.0, 1.0), yRange=(0.0, 1.0)) self.plotview.setCentralItem(self.plot) self.mainArea.layout().addWidget(self.plotview) def set_results(self, results): """Set the input evaluation results.""" self.clear() self.results = check_results_adequacy(results, self.Error) if self.results is not None: self._initialize(results) self._setup_plot() def clear(self): """Clear the widget state.""" self.results = None self.plot.clear() self.classifier_names = [] self.selected_classifiers = [] self.target_cb.clear() self.target_index = 0 self.colors = [] self._curve_data = {} self._plot_curves = {} self._rocch = None self._perf_line = None def _initialize(self, results): names = getattr(results, "learner_names", None) if names is None: names = [ "#{}".format(i + 1) for i in range(len(results.predicted)) ] self.colors = colorpalette.ColorPaletteGenerator( len(names), colorbrewer.colorSchemes["qualitative"]["Dark2"]) self.classifier_names = names self.selected_classifiers = list(range(len(names))) for i in range(len(names)): listitem = self.classifiers_list_box.item(i) listitem.setIcon(colorpalette.ColorPixmap(self.colors[i])) class_var = results.data.domain.class_var self.target_cb.addItems(class_var.values) def curve_data(self, target, clf_idx): """Return `ROCData' for the given target and classifier.""" if (target, clf_idx) not in self._curve_data: data = ROCData.from_results(self.results, clf_idx, target) self._curve_data[target, clf_idx] = data return self._curve_data[target, clf_idx] def plot_curves(self, target, clf_idx): """Return a set of functions `plot_curves` generating plot curves.""" def generate_pens(basecolor): pen = QPen(basecolor, 1) pen.setCosmetic(True) shadow_pen = QPen(pen.color().lighter(160), 2.5) shadow_pen.setCosmetic(True) return pen, shadow_pen data = self.curve_data(target, clf_idx) if (target, clf_idx) not in self._plot_curves: pen, shadow_pen = generate_pens(self.colors[clf_idx]) name = self.classifier_names[clf_idx] @once def merged(): return plot_curve(data.merged, pen=pen, shadow_pen=shadow_pen, name=name) @once def folds(): return [ plot_curve(fold, pen=pen, shadow_pen=shadow_pen) for fold in data.folds ] @once def avg_vert(): return plot_avg_curve(data.avg_vertical, pen=pen, shadow_pen=shadow_pen, name=name) @once def avg_thres(): return plot_avg_curve(data.avg_threshold, pen=pen, shadow_pen=shadow_pen, name=name) self._plot_curves[target, clf_idx] = plot_curves(merge=merged, folds=folds, avg_vertical=avg_vert, avg_threshold=avg_thres) return self._plot_curves[target, clf_idx] def _setup_plot(self): target = self.target_index selected = self.selected_classifiers curves = [self.plot_curves(target, i) for i in selected] selected = [self.curve_data(target, i) for i in selected] if self.roc_averaging == OWROCAnalysis.Merge: for curve in curves: graphics = curve.merge() curve = graphics.curve self.plot.addItem(graphics.curve_item) if self.display_convex_curve: self.plot.addItem(graphics.hull_item) if self.display_def_threshold: points = curve.points ind = numpy.argmin(numpy.abs(points.thresholds - 0.5)) item = pg.TextItem(text="{:.3f}".format( points.thresholds[ind]), ) item.setPos(points.fpr[ind], points.tpr[ind]) self.plot.addItem(item) hull_curves = [curve.merged.hull for curve in selected] if hull_curves: self._rocch = convex_hull(hull_curves) iso_pen = QPen(QColor(Qt.black), 1) iso_pen.setCosmetic(True) self._perf_line = InfiniteLine(pen=iso_pen, antialias=True) self.plot.addItem(self._perf_line) elif self.roc_averaging == OWROCAnalysis.Vertical: for curve in curves: graphics = curve.avg_vertical() self.plot.addItem(graphics.curve_item) self.plot.addItem(graphics.confint_item) hull_curves = [curve.avg_vertical.hull for curve in selected] elif self.roc_averaging == OWROCAnalysis.Threshold: for curve in curves: graphics = curve.avg_threshold() self.plot.addItem(graphics.curve_item) self.plot.addItem(graphics.confint_item) hull_curves = [curve.avg_threshold.hull for curve in selected] elif self.roc_averaging == OWROCAnalysis.NoAveraging: for curve in curves: graphics = curve.folds() for fold in graphics: self.plot.addItem(fold.curve_item) if self.display_convex_curve: self.plot.addItem(fold.hull_item) hull_curves = [ fold.hull for curve in selected for fold in curve.folds ] if self.display_convex_hull and hull_curves: hull = convex_hull(hull_curves) hull_pen = QPen(QColor(200, 200, 200, 100), 2) hull_pen.setCosmetic(True) item = self.plot.plot(hull.fpr, hull.tpr, pen=hull_pen, brush=QBrush(QColor(200, 200, 200, 50)), fillLevel=0) item.setZValue(-10000) pen = QPen(QColor(100, 100, 100, 100), 1, Qt.DashLine) pen.setCosmetic(True) self.plot.plot([0, 1], [0, 1], pen=pen, antialias=True) if self.roc_averaging == OWROCAnalysis.Merge: self._update_perf_line() def _on_target_changed(self): self.plot.clear() self._setup_plot() def _on_classifiers_changed(self): self.plot.clear() if self.results is not None: self._setup_plot() def _on_display_perf_line_changed(self): if self.roc_averaging == OWROCAnalysis.Merge: self._update_perf_line() if self.perf_line is not None: self.perf_line.setVisible(self.display_perf_line) def _on_display_def_threshold_changed(self): self._replot() def _replot(self): self.plot.clear() if self.results is not None: self._setup_plot() def _update_perf_line(self): if self._perf_line is None: return self._perf_line.setVisible(self.display_perf_line) if self.display_perf_line: m = roc_iso_performance_slope(self.fp_cost, self.fn_cost, self.target_prior / 100.0) hull = self._rocch ind = roc_iso_performance_line(m, hull) angle = numpy.arctan2(m, 1) # in radians self._perf_line.setAngle(angle * 180 / numpy.pi) self._perf_line.setPos((hull.fpr[ind[0]], hull.tpr[ind[0]])) def onDeleteWidget(self): self.clear() def send_report(self): if self.results is None: return items = OrderedDict() items["Target class"] = self.target_cb.currentText() if self.display_perf_line: items["Costs"] = \ "FP = {}, FN = {}".format(self.fp_cost, self.fn_cost) items["Target probability"] = "{} %".format(self.target_prior) caption = report.list_legend(self.classifiers_list_box, self.selected_classifiers) self.report_items(items) self.report_plot() self.report_caption(caption)
class OWSilhouettePlot(widget.OWWidget): name = "Silhouette Plot" description = "Visually assess cluster quality and " \ "the degree of cluster membership." icon = "icons/SilhouettePlot.svg" priority = 300 keywords = [] class Inputs: data = Input("Data", Orange.data.Table) class Outputs: selected_data = Output("Selected Data", Orange.data.Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table) replaces = [ "orangecontrib.prototypes.widgets.owsilhouetteplot.OWSilhouettePlot", "Orange.widgets.unsupervised.owsilhouetteplot.OWSilhouettePlot" ] settingsHandler = settings.PerfectDomainContextHandler() #: Distance metric index distance_idx = settings.Setting(0) #: Group/cluster variable index cluster_var_idx = settings.ContextSetting(0) #: Annotation variable index annotation_var_idx = settings.ContextSetting(0) #: Group the (displayed) silhouettes by cluster group_by_cluster = settings.Setting(True) #: A fixed size for an instance bar bar_size = settings.Setting(3) #: Add silhouette scores to output data add_scores = settings.Setting(False) auto_commit = settings.Setting(True) Distances = [("Euclidean", Orange.distance.Euclidean), ("Manhattan", Orange.distance.Manhattan), ("Cosine", Orange.distance.Cosine)] graph_name = "scene" buttons_area_orientation = Qt.Vertical class Error(widget.OWWidget.Error): need_two_clusters = Msg("Need at least two non-empty clusters") singleton_clusters_all = Msg("All clusters are singletons") memory_error = Msg("Not enough memory") value_error = Msg("Distances could not be computed: '{}'") class Warning(widget.OWWidget.Warning): missing_cluster_assignment = Msg( "{} instance{s} omitted (missing cluster assignment)") nan_distances = Msg("{} instance{s} omitted (undefined distances)") ignoring_categorical = Msg("Ignoring categorical features") def __init__(self): super().__init__() #: The input data self.data = None # type: Optional[Orange.data.Table] #: Distance matrix computed from data self._matrix = None # type: Optional[Orange.misc.DistMatrix] #: An bool mask (size == len(data)) indicating missing group/cluster #: assignments self._mask = None # type: Optional[np.ndarray] #: An array of cluster/group labels for instances with valid group #: assignment self._labels = None # type: Optional[np.ndarray] #: An array of silhouette scores for instances with valid group #: assignment self._silhouette = None # type: Optional[np.ndarray] self._silplot = None # type: Optional[SilhouettePlot] gui.comboBox(self.controlArea, self, "distance_idx", box="Distance", items=[name for name, _ in OWSilhouettePlot.Distances], orientation=Qt.Horizontal, callback=self._invalidate_distances) box = gui.vBox(self.controlArea, "Cluster Label") self.cluster_var_cb = gui.comboBox(box, self, "cluster_var_idx", contentsLength=14, addSpace=4, callback=self._invalidate_scores) gui.checkBox(box, self, "group_by_cluster", "Group by cluster", callback=self._replot) self.cluster_var_model = itemmodels.VariableListModel(parent=self) self.cluster_var_cb.setModel(self.cluster_var_model) box = gui.vBox(self.controlArea, "Bars") gui.widgetLabel(box, "Bar width:") gui.hSlider(box, self, "bar_size", minValue=1, maxValue=10, step=1, callback=self._update_bar_size, addSpace=6) gui.widgetLabel(box, "Annotations:") self.annotation_cb = gui.comboBox(box, self, "annotation_var_idx", contentsLength=14, callback=self._update_annotations) self.annotation_var_model = itemmodels.VariableListModel(parent=self) self.annotation_var_model[:] = ["None"] self.annotation_cb.setModel(self.annotation_var_model) ibox = gui.indentedBox(box, 5) self.ann_hidden_warning = warning = gui.widgetLabel( ibox, "(increase the width to show)") ibox.setFixedWidth(ibox.sizeHint().width()) warning.setVisible(False) gui.rubber(self.controlArea) gui.separator(self.buttonsArea) box = gui.vBox(self.buttonsArea, "Output") # Thunk the call to commit to call conditional commit gui.checkBox(box, self, "add_scores", "Add silhouette scores", callback=lambda: self.commit()) gui.auto_commit(box, self, "auto_commit", "Commit", auto_label="Auto commit", box=False) # Ensure that the controlArea is not narrower than buttonsArea self.controlArea.layout().addWidget(self.buttonsArea) self.scene = QGraphicsScene() self.view = QGraphicsView(self.scene) self.view.setRenderHint(QPainter.Antialiasing, True) self.view.setAlignment(Qt.AlignTop | Qt.AlignLeft) self.mainArea.layout().addWidget(self.view) def sizeHint(self): sh = self.controlArea.sizeHint() return sh.expandedTo(QSize(600, 720)) @Inputs.data @check_sql_input def set_data(self, data): """ Set the input dataset. """ self.closeContext() self.clear() error_msg = "" warning_msg = "" candidatevars = [] if data is not None: candidatevars = [ v for v in data.domain.variables + data.domain.metas if v.is_discrete and len(v.values) >= 2 ] if not candidatevars: error_msg = "Input does not have any suitable labels." data = None self.data = data if data is not None: self.cluster_var_model[:] = candidatevars if data.domain.class_var in candidatevars: self.cluster_var_idx = \ candidatevars.index(data.domain.class_var) else: self.cluster_var_idx = 0 annotvars = [var for var in data.domain.metas if var.is_string] self.annotation_var_model[:] = ["None"] + annotvars self.annotation_var_idx = 1 if len(annotvars) else 0 self.openContext(Orange.data.Domain(candidatevars)) self.error(error_msg) self.warning(warning_msg) def handleNewSignals(self): if self.data is not None: self._update() self._replot() self.unconditional_commit() def clear(self): """ Clear the widget state. """ self.data = None self._matrix = None self._mask = None self._silhouette = None self._labels = None self.cluster_var_model[:] = [] self.annotation_var_model[:] = ["None"] self._clear_scene() self.Error.clear() self.Warning.clear() def _clear_scene(self): # Clear the graphics scene and associated objects self.scene.clear() self.scene.setSceneRect(QRectF()) self._silplot = None def _invalidate_distances(self): # Invalidate the computed distance matrix and recompute the silhouette. self._matrix = None self._invalidate_scores() def _invalidate_scores(self): # Invalidate and recompute the current silhouette scores. self._labels = self._silhouette = self._mask = None self._update() self._replot() if self.data is not None: self.commit() def _update(self): # Update/recompute the distances/scores as required self._clear_messages() if self.data is None or not len(self.data): self._reset_all() return if self._matrix is None and self.data is not None: _, metric = self.Distances[self.distance_idx] data = self.data if not metric.supports_discrete and any( a.is_discrete for a in data.domain.attributes): self.Warning.ignoring_categorical() data = Orange.distance.remove_discrete_features(data) try: self._matrix = np.asarray(metric(data)) except MemoryError: self.Error.memory_error() return except ValueError as err: self.Error.value_error(str(err)) return self._update_labels() def _reset_all(self): self._mask = None self._silhouette = None self._labels = None self._matrix = None self._clear_scene() def _clear_messages(self): self.Error.clear() self.Warning.clear() def _update_labels(self): labelvar = self.cluster_var_model[self.cluster_var_idx] labels, _ = self.data.get_column_view(labelvar) labels = np.asarray(labels, dtype=float) cluster_mask = np.isnan(labels) dist_mask = np.isnan(self._matrix).all(axis=0) mask = cluster_mask | dist_mask labels = labels.astype(int) labels = labels[~mask] labels_unq, _ = np.unique(labels, return_counts=True) if len(labels_unq) < 2: self.Error.need_two_clusters() labels = silhouette = mask = None elif len(labels_unq) == len(labels): self.Error.singleton_clusters_all() labels = silhouette = mask = None else: silhouette = sklearn.metrics.silhouette_samples( self._matrix[~mask, :][:, ~mask], labels, metric="precomputed") self._mask = mask self._labels = labels self._silhouette = silhouette if mask is not None: count_missing = np.count_nonzero(cluster_mask) if count_missing: self.Warning.missing_cluster_assignment( count_missing, s="s" if count_missing > 1 else "") count_nandist = np.count_nonzero(dist_mask) if count_nandist: self.Warning.nan_distances(count_nandist, s="s" if count_nandist > 1 else "") def _set_bar_height(self): visible = self.bar_size >= 5 self._silplot.setBarHeight(self.bar_size) self._silplot.setRowNamesVisible(visible) self.ann_hidden_warning.setVisible(not visible and self.annotation_var_idx > 0) def _replot(self): # Clear and replot/initialize the scene self._clear_scene() if self._silhouette is not None and self._labels is not None: var = self.cluster_var_model[self.cluster_var_idx] self._silplot = silplot = SilhouettePlot() self._set_bar_height() if self.group_by_cluster: silplot.setScores(self._silhouette, self._labels, var.values, var.colors) else: silplot.setScores(self._silhouette, np.zeros(len(self._silhouette), dtype=int), [""], np.array([[63, 207, 207]])) self.scene.addItem(silplot) self._update_annotations() silplot.selectionChanged.connect(self.commit) silplot.layout().activate() self._update_scene_rect() silplot.geometryChanged.connect(self._update_scene_rect) def _update_bar_size(self): if self._silplot is not None: self._set_bar_height() def _update_annotations(self): if 0 < self.annotation_var_idx < len(self.annotation_var_model): annot_var = self.annotation_var_model[self.annotation_var_idx] else: annot_var = None self.ann_hidden_warning.setVisible(self.bar_size < 5 and annot_var is not None) if self._silplot is not None: if annot_var is not None: column, _ = self.data.get_column_view(annot_var) if self._mask is not None: assert column.shape == self._mask.shape column = column[~self._mask] self._silplot.setRowNames( [annot_var.str_val(value) for value in column]) else: self._silplot.setRowNames(None) def _update_scene_rect(self): self.scene.setSceneRect(self._silplot.geometry()) def commit(self): """ Commit/send the current selection to the output. """ selected = indices = data = None if self.data is not None: selectedmask = np.full(len(self.data), False, dtype=bool) if self._silplot is not None: indices = self._silplot.selection() assert (np.diff(indices) > 0).all(), "strictly increasing" if self._mask is not None: indices = np.flatnonzero(~self._mask)[indices] selectedmask[indices] = True if self._mask is not None: scores = np.full(shape=selectedmask.shape, fill_value=np.nan) scores[~self._mask] = self._silhouette else: scores = self._silhouette silhouette_var = None if self.add_scores: var = self.cluster_var_model[self.cluster_var_idx] silhouette_var = Orange.data.ContinuousVariable( "Silhouette ({})".format(escape(var.name))) domain = Orange.data.Domain( self.data.domain.attributes, self.data.domain.class_vars, self.data.domain.metas + (silhouette_var, )) data = self.data.transform(domain) else: domain = self.data.domain data = self.data if np.count_nonzero(selectedmask): selected = self.data.from_table(domain, self.data, np.flatnonzero(selectedmask)) if self.add_scores: if selected is not None: selected[:, silhouette_var] = np.c_[scores[selectedmask]] data[:, silhouette_var] = np.c_[scores] self.Outputs.selected_data.send(selected) self.Outputs.annotated_data.send(create_annotated_table(data, indices)) def send_report(self): if not len(self.cluster_var_model): return self.report_plot() caption = "Silhouette plot ({} distance), clustered by '{}'".format( self.Distances[self.distance_idx][0], self.cluster_var_model[self.cluster_var_idx]) if self.annotation_var_idx and self._silplot.rowNamesVisible(): caption += ", annotated with '{}'".format( self.annotation_var_model[self.annotation_var_idx]) self.report_caption(caption) def onDeleteWidget(self): self.clear() super().onDeleteWidget()
class OWDiscretize(widget.OWWidget): name = "Discretize" description = "Discretize the numeric data features." icon = "icons/Discretize.svg" class Inputs: data = Input("Data", Orange.data.Table, doc="Input data table") class Outputs: data = Output("Data", Orange.data.Table, doc="Table with discretized features") settingsHandler = settings.DomainContextHandler() saved_var_states = settings.ContextSetting({}) default_method = settings.Setting(2) default_k = settings.Setting(3) autosend = settings.Setting(True) #: Discretization methods Default, Leave, MDL, EqualFreq, EqualWidth, Remove, Custom = range(7) want_main_area = False resizing_enabled = False def __init__(self): super().__init__() #: input data self.data = None #: Current variable discretization state self.var_state = {} #: Saved variable discretization settings (context setting) self.saved_var_states = {} self.method = 0 self.k = 5 box = gui.vBox(self.controlArea, self.tr("Default Discretization")) self.default_bbox = rbox = gui.radioButtons( box, self, "default_method", callback=self._default_disc_changed) rb = gui.hBox(rbox) self.left = gui.vBox(rb) right = gui.vBox(rb) rb.layout().setStretch(0, 1) rb.layout().setStretch(1, 1) options = self.options = [ self.tr("Default"), self.tr("Leave numeric"), self.tr("Entropy-MDL discretization"), self.tr("Equal-frequency discretization"), self.tr("Equal-width discretization"), self.tr("Remove numeric variables") ] for opt in options[1:]: t = gui.appendRadioButton(rbox, opt) # This condition is ugly, but it keeps the same order of # options for backward compatibility of saved schemata [right, self.left][opt.startswith("Equal")].layout().addWidget(t) gui.separator(right, 18, 18) def _intbox(widget, attr, callback): box = gui.indentedBox(widget) s = gui.spin( box, self, attr, minv=2, maxv=10, label="Num. of intervals:", callback=callback) s.setMaximumWidth(60) s.setAlignment(Qt.AlignRight) gui.rubber(s.box) return box.box self.k_general = _intbox(self.left, "default_k", self._default_disc_changed) self.k_general.layout().setContentsMargins(0, 0, 0, 0) vlayout = QHBoxLayout() box = gui.widgetBox( self.controlArea, "Individual Attribute Settings", orientation=vlayout, spacing=8 ) # List view with all attributes self.varview = QListView(selectionMode=QListView.ExtendedSelection) self.varview.setItemDelegate(DiscDelegate()) self.varmodel = itemmodels.VariableListModel() self.varview.setModel(self.varmodel) self.varview.selectionModel().selectionChanged.connect( self._var_selection_changed ) vlayout.addWidget(self.varview) # Controls for individual attr settings self.bbox = controlbox = gui.radioButtons( box, self, "method", callback=self._disc_method_changed ) vlayout.addWidget(controlbox) for opt in options[:5]: gui.appendRadioButton(controlbox, opt) self.k_specific = _intbox(controlbox, "k", self._disc_method_changed) gui.appendRadioButton(controlbox, "Remove attribute") gui.rubber(controlbox) controlbox.setEnabled(False) self.controlbox = controlbox box = gui.auto_commit( self.controlArea, self, "autosend", "Apply", orientation=Qt.Horizontal, checkbox_label="Apply automatically") box.layout().insertSpacing(0, 20) box.layout().insertWidget(0, self.report_button) self._update_spin_positions() @Inputs.data def set_data(self, data): self.closeContext() self.data = data if self.data is not None: self._initialize(data) self.openContext(data) # Restore the per variable discretization settings self._restore(self.saved_var_states) # Complete the induction of cut points self._update_points() else: self._clear() self.unconditional_commit() def _initialize(self, data): # Initialize the default variable states for new data. self.class_var = data.domain.class_var cvars = [var for var in data.domain if var.is_continuous] self.varmodel[:] = cvars class_var = data.domain.class_var has_disc_class = data.domain.has_discrete_class self.default_bbox.buttons[self.MDL - 1].setEnabled(has_disc_class) self.bbox.buttons[self.MDL].setEnabled(has_disc_class) # If the newly disabled MDL button is checked then change it if not has_disc_class and self.default_method == self.MDL - 1: self.default_method = 0 if not has_disc_class and self.method == self.MDL: self.method = 0 # Reset (initialize) the variable discretization states. self._reset() def _restore(self, saved_state): # Restore variable states from a saved_state dictionary. def_method = self._current_default_method() for i, var in enumerate(self.varmodel): key = variable_key(var) if key in saved_state: state = saved_state[key] if isinstance(state.method, Default): state = DState(Default(def_method), None, None) self._set_var_state(i, state) def _reset(self): # restore the individual variable settings back to defaults. def_method = self._current_default_method() self.var_state = {} for i in range(len(self.varmodel)): state = DState(Default(def_method), None, None) self._set_var_state(i, state) def _set_var_state(self, index, state): # set the state of variable at `index` to `state`. self.var_state[index] = state self.varmodel.setData(self.varmodel.index(index), state, Qt.UserRole) def _clear(self): self.data = None self.varmodel[:] = [] self.var_state = {} self.saved_var_states = {} self.default_bbox.buttons[self.MDL - 1].setEnabled(True) self.bbox.buttons[self.MDL].setEnabled(True) def _update_points(self): """ Update the induced cut points. """ if self.data is None or not len(self.data): return def induce_cuts(method, data, var): dvar = _dispatch[type(method)](method, data, var) if dvar is None: # removed return [], None elif dvar is var: # no transformation took place return None, var elif is_discretized(dvar): return dvar.compute_value.points, dvar else: assert False for i, var in enumerate(self.varmodel): state = self.var_state[i] if state.points is None and state.disc_var is None: points, dvar = induce_cuts(state.method, self.data, var) new_state = state._replace(points=points, disc_var=dvar) self._set_var_state(i, new_state) def _method_index(self, method): return METHODS.index((type(method), )) def _current_default_method(self): method = self.default_method + 1 k = self.default_k if method == OWDiscretize.Leave: def_method = Leave() elif method == OWDiscretize.MDL: def_method = MDL() elif method == OWDiscretize.EqualFreq: def_method = EqualFreq(k) elif method == OWDiscretize.EqualWidth: def_method = EqualWidth(k) elif method == OWDiscretize.Remove: def_method = Remove() else: assert False return def_method def _current_method(self): if self.method == OWDiscretize.Default: method = Default(self._current_default_method()) elif self.method == OWDiscretize.Leave: method = Leave() elif self.method == OWDiscretize.MDL: method = MDL() elif self.method == OWDiscretize.EqualFreq: method = EqualFreq(self.k) elif self.method == OWDiscretize.EqualWidth: method = EqualWidth(self.k) elif self.method == OWDiscretize.Remove: method = Remove() elif self.method == OWDiscretize.Custom: method = Custom(self.cutpoints) else: assert False return method def _update_spin_positions(self): self.k_general.setDisabled(self.default_method not in [2, 3]) if self.default_method == 2: self.left.layout().insertWidget(1, self.k_general) elif self.default_method == 3: self.left.layout().insertWidget(2, self.k_general) self.k_specific.setDisabled(self.method not in [3, 4]) if self.method == 3: self.bbox.layout().insertWidget(4, self.k_specific) elif self.method == 4: self.bbox.layout().insertWidget(5, self.k_specific) def _default_disc_changed(self): self._update_spin_positions() method = self._current_default_method() state = DState(Default(method), None, None) for i, _ in enumerate(self.varmodel): if isinstance(self.var_state[i].method, Default): self._set_var_state(i, state) self._update_points() self.commit() def _disc_method_changed(self): self._update_spin_positions() indices = self.selected_indices() method = self._current_method() state = DState(method, None, None) for idx in indices: self._set_var_state(idx, state) self._update_points() self.commit() def _var_selection_changed(self, *args): indices = self.selected_indices() # set of all methods for the current selection methods = [self.var_state[i].method for i in indices] mset = set(methods) self.controlbox.setEnabled(len(mset) > 0) if len(mset) == 1: method = mset.pop() self.method = self._method_index(method) if isinstance(method, (EqualFreq, EqualWidth)): self.k = method.k elif isinstance(method, Custom): self.cutpoints = method.points else: # deselect the current button self.method = -1 bg = self.controlbox.group button_group_reset(bg) self._update_spin_positions() def selected_indices(self): rows = self.varview.selectionModel().selectedRows() return [index.row() for index in rows] def discretized_var(self, source): index = list(self.varmodel).index(source) state = self.var_state[index] if state.disc_var is None: return None elif state.disc_var is source: return source elif state.points == []: return None else: return state.disc_var def discretized_domain(self): """ Return the current effective discretized domain. """ if self.data is None: return None def disc_var(source): if source and source.is_continuous: return self.discretized_var(source) else: return source attributes = [disc_var(v) for v in self.data.domain.attributes] attributes = [v for v in attributes if v is not None] class_var = disc_var(self.data.domain.class_var) domain = Orange.data.Domain( attributes, class_var, metas=self.data.domain.metas ) return domain def commit(self): output = None if self.data is not None and len(self.data): domain = self.discretized_domain() output = self.data.transform(domain) self.Outputs.data.send(output) def storeSpecificSettings(self): super().storeSpecificSettings() self.saved_var_states = { variable_key(var): self.var_state[i]._replace(points=None, disc_var=None) for i, var in enumerate(self.varmodel) } def send_report(self): self.report_items(( ("Default method", self.options[self.default_method + 1]),)) if self.varmodel: self.report_items("Thresholds", [ (var.name, DiscDelegate.cutsText(self.var_state[i]) or "leave numeric") for i, var in enumerate(self.varmodel)])
class OWKEGGPathwayBrowser(widget.OWWidget): name = "KEGG Pathways" description = "Browse KEGG pathways that include an input set of genes." icon = "../widgets/icons/OWKEGGPathwayBrowser.svg" priority = 8 inputs = [("Data", Orange.data.Table, "SetData", widget.Default), ("Reference", Orange.data.Table, "SetRefData")] outputs = [("Selected Data", Orange.data.Table, widget.Default), ("Unselected Data", Orange.data.Table)] settingsHandler = settings.DomainContextHandler() organismIndex = settings.ContextSetting(0) geneAttrIndex = settings.ContextSetting(0) useAttrNames = settings.ContextSetting(False) autoCommit = settings.Setting(False) autoResize = settings.Setting(True) useReference = settings.Setting(False) showOrthology = settings.Setting(True) Ready, Initializing, Running = 0, 1, 2 def __init__(self, parent=None): super().__init__(parent) self.organismCodes = [] self._changedFlag = False self.__invalidated = False self.__runstate = OWKEGGPathwayBrowser.Initializing self.__in_setProgress = False self.controlArea.setMaximumWidth(250) box = gui.widgetBox(self.controlArea, "Info") self.infoLabel = gui.widgetLabel(box, "No data on input\n") # Organism selection. box = gui.widgetBox(self.controlArea, "Organism") self.organismComboBox = gui.comboBox( box, self, "organismIndex", items=[], callback=self.Update, addSpace=True, tooltip="Select the organism of the input genes") # Selection of genes attribute box = gui.widgetBox(self.controlArea, "Gene attribute") self.geneAttrCandidates = itemmodels.VariableListModel(parent=self) self.geneAttrCombo = gui.comboBox(box, self, "geneAttrIndex", callback=self.Update) self.geneAttrCombo.setModel(self.geneAttrCandidates) gui.checkBox(box, self, "useAttrNames", "Use variable names", disables=[(-1, self.geneAttrCombo)], callback=self.Update) self.geneAttrCombo.setDisabled(bool(self.useAttrNames)) gui.separator(self.controlArea) gui.checkBox(self.controlArea, self, "useReference", "From signal", box="Reference", callback=self.Update) gui.separator(self.controlArea) gui.checkBox(self.controlArea, self, "showOrthology", "Show pathways in full orthology", box="Orthology", callback=self.UpdateListView) gui.checkBox(self.controlArea, self, "autoResize", "Resize to fit", box="Image", callback=self.UpdatePathwayViewTransform) box = gui.widgetBox(self.controlArea, "Cache Control") gui.button(box, self, "Clear cache", callback=self.ClearCache, tooltip="Clear all locally cached KEGG data.", default=False, autoDefault=False) gui.separator(self.controlArea) gui.auto_commit(self.controlArea, self, "autoCommit", "Commit") gui.rubber(self.controlArea) spliter = QSplitter(Qt.Vertical, self.mainArea) self.pathwayView = PathwayView(self, spliter) self.pathwayView.scene().selectionChanged.connect( self._onSelectionChanged) self.mainArea.layout().addWidget(spliter) self.listView = QTreeWidget(allColumnsShowFocus=True, selectionMode=QTreeWidget.SingleSelection, sortingEnabled=True, maximumHeight=200) spliter.addWidget(self.listView) self.listView.setColumnCount(4) self.listView.setHeaderLabels( ["Pathway", "P value", "Genes", "Reference"]) self.listView.itemSelectionChanged.connect(self.UpdatePathwayView) select = QAction("Select All", self, shortcut=QKeySequence.SelectAll) select.triggered.connect(self.selectAll) self.addAction(select) self.data = None self.refData = None self._executor = concurrent.ThreadExecutor() self.setEnabled(False) self.setBlocking(True) progress = concurrent.methodinvoke(self, "setProgress", (float, )) def get_genome(): """Return a KEGGGenome with the common org entries precached.""" genome = kegg.KEGGGenome() essential = genome.essential_organisms() common = genome.common_organisms() # Remove duplicates of essential from common. # (essential + common list as defined here will be used in the # GUI.) common = [c for c in common if c not in essential] # TODO: Add option to specify additional organisms not # in the common list. keys = list(map(genome.org_code_to_entry_key, essential + common)) genome.pre_cache(keys, progress_callback=progress) return (keys, genome) self._genomeTask = task = concurrent.Task(function=get_genome) task.finished.connect(self.__initialize_finish) self.progressBarInit() self.infoLabel.setText("Fetching organism definitions\n") self._executor.submit(task) def __initialize_finish(self): if self.__runstate != OWKEGGPathwayBrowser.Initializing: return try: keys, genome = self._genomeTask.result() except Exception as err: self.error(0, str(err)) raise self.progressBarFinished() self.setEnabled(True) self.setBlocking(False) entries = [genome[key] for key in keys] items = [entry.definition for entry in entries] codes = [entry.organism_code for entry in entries] self.organismCodes = codes self.organismComboBox.clear() self.organismComboBox.addItems(items) self.organismComboBox.setCurrentIndex(self.organismIndex) self.infoLabel.setText("No data on input\n") def Clear(self): """ Clear the widget state. """ self.queryGenes = [] self.referenceGenes = [] self.genes = {} self.uniqueGenesDict = {} self.revUniqueGenesDict = {} self.pathways = {} self.org = None self.geneAttrCandidates[:] = [] self.infoLabel.setText("No data on input\n") self.listView.clear() self.pathwayView.SetPathway(None) self.send("Selected Data", None) self.send("Unselected Data", None) def SetData(self, data=None): if self.__runstate == OWKEGGPathwayBrowser.Initializing: self.__initialize_finish() self.data = data self.warning(0) self.error(0) self.information(0) if data is not None: vars = data.domain.variables + data.domain.metas vars = [ var for var in vars if isinstance(var, Orange.data.StringVariable) ] self.geneAttrCandidates[:] = vars # Try to guess the gene name variable if vars: names_lower = [v.name.lower() for v in vars] scores = [(name == "gene", "gene" in name) for name in names_lower] imax, _ = max(enumerate(scores), key=itemgetter(1)) else: imax = -1 self.geneAttrIndex = imax taxid = str(data.attributes.get(TAX_ID, '')) if taxid: try: code = kegg.from_taxid(taxid) self.organismIndex = self.organismCodes.index(code) except Exception as ex: print(ex, taxid) self.useAttrNames = data.attributes.get(GENE_AS_ATTRIBUTE_NAME, self.useAttrNames) if len(self.geneAttrCandidates) == 0: self.useAttrNames = True self.geneAttrIndex = -1 else: self.geneAttrIndex = min(self.geneAttrIndex, len(self.geneAttrCandidates) - 1) else: self.Clear() self.__invalidated = True def SetRefData(self, data=None): self.refData = data self.information(1) if data is not None and self.useReference: self.__invalidated = True def handleNewSignals(self): if self.__invalidated: self.Update() self.__invalidated = False def UpdateListView(self): self.bestPValueItem = None self.listView.clear() if not self.data: return allPathways = self.org.pathways() allRefPathways = kegg.pathways("map") items = [] kegg_pathways = kegg.KEGGPathways() org_code = self.organismCodes[min(self.organismIndex, len(self.organismCodes) - 1)] if self.showOrthology: self.koOrthology = kegg.KEGGBrite("ko00001") self.listView.setRootIsDecorated(True) path_ids = set([s[-5:] for s in self.pathways.keys()]) def _walkCollect(koEntry): num = koEntry.title[:5] if koEntry.title else None if num in path_ids: return ([koEntry] + reduce(lambda li, c: li + _walkCollect(c), [child for child in koEntry.entries], [])) else: c = reduce(lambda li, c: li + _walkCollect(c), [child for child in koEntry.entries], []) return c + (c and [koEntry] or []) allClasses = reduce(lambda li1, li2: li1 + li2, [_walkCollect(c) for c in self.koOrthology], []) def _walkCreate(koEntry, lvItem): item = QTreeWidgetItem(lvItem) id = "path:" + org_code + koEntry.title[:5] if koEntry.title[:5] in path_ids: p = kegg_pathways.get_entry(id) if p is None: # In case the genesets still have obsolete entries name = koEntry.title else: name = p.name genes, p_value, ref = self.pathways[id] item.setText(0, name) item.setText(1, "%.5f" % p_value) item.setText(2, "%i of %i" % (len(genes), len(self.genes))) item.setText(3, "%i of %i" % (ref, len(self.referenceGenes))) item.pathway_id = id if p is not None else None else: if id in allPathways: text = kegg_pathways.get_entry(id).name else: text = koEntry.title item.setText(0, text) if id in allPathways: item.pathway_id = id elif "path:map" + koEntry.title[:5] in allRefPathways: item.pathway_id = "path:map" + koEntry.title[:5] else: item.pathway_id = None for child in koEntry.entries: if child in allClasses: _walkCreate(child, item) for koEntry in self.koOrthology: if koEntry in allClasses: _walkCreate(koEntry, self.listView) self.listView.update() else: self.listView.setRootIsDecorated(False) pathways = self.pathways.items() pathways = sorted(pathways, key=lambda item: item[1][1]) for id, (genes, p_value, ref) in pathways: item = QTreeWidgetItem(self.listView) item.setText(0, kegg_pathways.get_entry(id).name) item.setText(1, "%.5f" % p_value) item.setText(2, "%i of %i" % (len(genes), len(self.genes))) item.setText(3, "%i of %i" % (ref, len(self.referenceGenes))) item.pathway_id = id items.append(item) self.bestPValueItem = items and items[0] or None self.listView.expandAll() for i in range(4): self.listView.resizeColumnToContents(i) if self.bestPValueItem: index = self.listView.indexFromItem(self.bestPValueItem) self.listView.selectionModel().select( index, QItemSelectionModel.ClearAndSelect) def UpdatePathwayView(self): items = self.listView.selectedItems() if len(items) > 0: item = items[0] else: item = None self.commit() item = item or self.bestPValueItem if not item or not item.pathway_id: self.pathwayView.SetPathway(None) return def get_kgml_and_image(pathway_id): """Return an initialized KEGGPathway with pre-cached data""" p = kegg.KEGGPathway(pathway_id) p._get_kgml() # makes sure the kgml file is downloaded p._get_image_filename() # makes sure the image is downloaded return (pathway_id, p) self.setEnabled(False) self._pathwayTask = concurrent.Task( function=lambda: get_kgml_and_image(item.pathway_id)) self._pathwayTask.finished.connect(self._onPathwayTaskFinshed) self._executor.submit(self._pathwayTask) def _onPathwayTaskFinshed(self): self.setEnabled(True) pathway_id, self.pathway = self._pathwayTask.result() self.pathwayView.SetPathway(self.pathway, self.pathways.get(pathway_id, [[]])[0]) def UpdatePathwayViewTransform(self): self.pathwayView.updateTransform() def Update(self): """ Update (recompute enriched pathways) the widget state. """ if not self.data: return self.error(0) self.information(0) # XXX: Check data in setData, do not even allow this to be executed if # data has no genes try: genes = self.GeneNamesFromData(self.data) except ValueError: self.error(0, "Cannot extract gene names from input.") genes = [] if not self.useAttrNames and any("," in gene for gene in genes): genes = reduce(add, (split_and_strip(gene, ",") for gene in genes), []) self.information( 0, "Separators detected in input gene names. " "Assuming multiple genes per instance.") self.queryGenes = genes self.information(1) reference = None if self.useReference and self.refData: reference = self.GeneNamesFromData(self.refData) if not self.useAttrNames \ and any("," in gene for gene in reference): reference = reduce(add, (split_and_strip(gene, ",") for gene in reference), []) self.information( 1, "Separators detected in reference gene " "names. Assuming multiple genes per " "instance.") org_code = self.SelectedOrganismCode() gm = GeneMatcher(kegg.to_taxid(org_code)) gm.genes = genes gm.run_matcher() mapped_genes = { gene: str(ncbi_id) for gene, ncbi_id in gm.map_input_to_ncbi().items() } def run_enrichment(org_code, genes, reference=None, progress=None): org = kegg.KEGGOrganism(org_code) if reference is None: reference = org.get_ncbi_ids() # This is here just to keep widget working without any major changes. # map not needed, geneMatcher will not work on widget level. unique_genes = genes unique_ref_genes = dict([(gene, gene) for gene in set(reference)]) taxid = kegg.to_taxid(org.org_code) # Map the taxid back to standard 'common' taxids # (as used by 'geneset') if applicable r_tax_map = dict( (v, k) for k, v in kegg.KEGGGenome.TAXID_MAP.items()) if taxid in r_tax_map: taxid = r_tax_map[taxid] # We use the kegg pathway gene sets provided by 'geneset' for # the enrichment calculation. kegg_api = kegg.api.CachedKeggApi() linkmap = kegg_api.link(org.org_code, "pathway") converted_ids = kegg_api.conv(org.org_code, 'ncbi-geneid') kegg_sets = relation_list_to_multimap( linkmap, dict((gene.upper(), ncbi.split(':')[-1]) for ncbi, gene in converted_ids)) kegg_sets = geneset.GeneSets(sets=[ geneset.GeneSet(gs_id=ddi, genes=set(genes)) for ddi, genes in kegg_sets.items() ]) pathways = pathway_enrichment(kegg_sets, unique_genes.values(), unique_ref_genes.keys(), callback=progress) # Ensure that pathway entries are pre-cached for later use in the # list/tree view kegg_pathways = kegg.KEGGPathways() kegg_pathways.pre_cache(pathways.keys(), progress_callback=progress) return pathways, org, unique_genes, unique_ref_genes self.progressBarInit() self.setEnabled(False) self.infoLabel.setText("Retrieving...\n") progress = concurrent.methodinvoke(self, "setProgress", (float, )) self._enrichTask = concurrent.Task(function=lambda: run_enrichment( org_code, mapped_genes, reference, progress)) self._enrichTask.finished.connect(self._onEnrichTaskFinished) self._executor.submit(self._enrichTask) def _onEnrichTaskFinished(self): self.setEnabled(True) self.setBlocking(False) try: pathways, org, unique_genes, unique_ref_genes = \ self._enrichTask.result() except Exception: raise self.progressBarFinished() self.org = org self.genes = unique_genes.keys() self.uniqueGenesDict = { ncbi_id: input_name for input_name, ncbi_id in unique_genes.items() } self.revUniqueGenesDict = dict([ (val, key) for key, val in self.uniqueGenesDict.items() ]) self.referenceGenes = unique_ref_genes.keys() self.pathways = pathways if not self.pathways: self.warning(0, "No enriched pathways found.") else: self.warning(0) count = len(set(self.queryGenes)) self.infoLabel.setText("%i unique gene names on input\n" "%i (%.1f%%) genes names matched" % (count, len(unique_genes), 100.0 * len(unique_genes) / count if count else 0.0)) self.UpdateListView() @Slot(float) def setProgress(self, value): if self.__in_setProgress: return self.__in_setProgress = True self.progressBarSet(value) self.__in_setProgress = False def GeneNamesFromData(self, data): """ Extract and return gene names from `data`. """ if self.useAttrNames: genes = [str(v.name).strip() for v in data.domain.attributes] elif self.geneAttrCandidates: assert 0 <= self.geneAttrIndex < len(self.geneAttrCandidates) geneAttr = self.geneAttrCandidates[self.geneAttrIndex] genes = [ str(e[geneAttr]) for e in data if not numpy.isnan(e[geneAttr]) ] else: raise ValueError("No gene names in data.") return genes def SelectedOrganismCode(self): """ Return the selected organism code. """ return self.organismCodes[min(self.organismIndex, len(self.organismCodes) - 1)] def selectAll(self): """ Select all items in the pathway view. """ changed = False scene = self.pathwayView.scene() with disconnected(scene.selectionChanged, self._onSelectionChanged): for item in scene.items(): if item.flags() & QGraphicsItem.ItemIsSelectable and \ not item.isSelected(): item.setSelected(True) changed = True if changed: self._onSelectionChanged() def _onSelectionChanged(self): # Item selection in the pathwayView/scene has changed self.commit() def commit(self): if self.data: selectedItems = self.pathwayView.scene().selectedItems() selectedGenes = reduce( set.union, [item.marked_objects for item in selectedItems], set()) if self.useAttrNames: selected = [ self.data.domain[self.uniqueGenesDict[gene]] for gene in selectedGenes ] # newDomain = Orange.data.Domain(selectedVars, 0) data = self.data[:, selected] # data = Orange.data.Table(newDomain, self.data) self.send("Selected Data", data) elif self.geneAttrCandidates: assert 0 <= self.geneAttrIndex < len(self.geneAttrCandidates) geneAttr = self.geneAttrCandidates[self.geneAttrIndex] selectedIndices = [] otherIndices = [] for i, ex in enumerate(self.data): names = [ self.revUniqueGenesDict.get(name, None) for name in split_and_strip(str(ex[geneAttr]), ",") ] if any(name and name in selectedGenes for name in names): selectedIndices.append(i) else: otherIndices.append(i) if selectedIndices: selected = self.data[selectedIndices] else: selected = None if otherIndices: other = self.data[otherIndices] else: other = None self.send("Selected Data", selected) self.send("Unselected Data", other) else: self.send("Selected Data", None) self.send("Unselected Data", None) def ClearCache(self): kegg.caching.clear_cache() def onDeleteWidget(self): """ Called before the widget is removed from the canvas. """ super().onDeleteWidget() self.org = None self._executor.shutdown(wait=False) gc.collect() # Force collection (WHY?) def sizeHint(self): return QSize(1024, 720)
class OWSVMClassification(widget.OWWidget): name = "SVM" description = "Support vector machines classifier with standard " \ "selection of kernels." icon = "icons/SVM.svg" inputs = [("Data", Table, "set_data"), ("Preprocessor", Preprocess, "set_preprocessor")] outputs = [("Learner", SVMLearner, widget.Default), ("Classifier", SVMClassifier), ("Support vectors", Table)] want_main_area = False resizing_enabled = False learner_name = settings.Setting("SVM Learner") # 0: c_svc, 1: nu_svc svmtype = settings.Setting(0) C = settings.Setting(1.0) nu = settings.Setting(0.5) # 0: Linear, 1: Poly, 2: RBF, 3: Sigmoid kernel_type = settings.Setting(0) degree = settings.Setting(3) gamma = settings.Setting(0.0) coef0 = settings.Setting(0.0) shrinking = settings.Setting(True), probability = settings.Setting(False) tol = settings.Setting(0.001) max_iter = settings.Setting(100) limit_iter = settings.Setting(True) def __init__(self): super().__init__() self.data = None self.preprocessors = None box = gui.widgetBox(self.controlArea, self.tr("Name")) gui.lineEdit(box, self, "learner_name") form = QtGui.QGridLayout() typebox = gui.radioButtonsInBox( self.controlArea, self, "svmtype", [], box=self.tr("SVM Type"), orientation=form, ) c_svm = gui.appendRadioButton(typebox, "C-SVM", addToLayout=False) form.addWidget(c_svm, 0, 0, Qt.AlignLeft) form.addWidget(QtGui.QLabel(self.tr("Cost (C)")), 0, 1, Qt.AlignRight) c_spin = gui.doubleSpin(typebox, self, "C", 1e-3, 1000.0, 0.1, decimals=3, addToLayout=False) form.addWidget(c_spin, 0, 2) nu_svm = gui.appendRadioButton(typebox, "ν-SVM", addToLayout=False) form.addWidget(nu_svm, 1, 0, Qt.AlignLeft) form.addWidget(QtGui.QLabel(self.trUtf8("Complexity bound (\u03bd)")), 1, 1, Qt.AlignRight) nu_spin = gui.doubleSpin(typebox, self, "nu", 0.05, 1.0, 0.05, decimals=2, addToLayout=False) form.addWidget(nu_spin, 1, 2) box = gui.widgetBox(self.controlArea, self.tr("Kernel")) buttonbox = gui.radioButtonsInBox(box, self, "kernel_type", btnLabels=[ "Linear, x∙y", "Polynomial, (g x∙y + c)^d", "RBF, exp(-g|x-y|²)", "Sigmoid, tanh(g x∙y + c)" ], callback=self._on_kernel_changed) parambox = gui.widgetBox(box, orientation="horizontal") gamma = gui.doubleSpin(parambox, self, "gamma", 0.0, 10.0, 0.0001, label=" g: ", orientation="horizontal", alignment=Qt.AlignRight) coef0 = gui.doubleSpin(parambox, self, "coef0", 0.0, 10.0, 0.0001, label=" c: ", orientation="horizontal", alignment=Qt.AlignRight) degree = gui.doubleSpin(parambox, self, "degree", 0.0, 10.0, 0.5, label=" d: ", orientation="horizontal", alignment=Qt.AlignRight) self._kernel_params = [gamma, coef0, degree] box = gui.widgetBox(self.controlArea, "Optimization parameters") gui.doubleSpin(box, self, "tol", 1e-7, 1.0, 5e-7, label="Numerical Tolerance") gui.spin(box, self, "max_iter", 0, 1e6, 100, label="Iteration Limit", checked="limit_iter") gui.button(self.controlArea, self, "&Apply", callback=self.apply, default=True) self._on_kernel_changed() self.apply() @check_sql_input def set_data(self, data): """Set the input train data set.""" self.data = data if data is not None: self.apply() def set_preprocessor(self, preproc): if preproc is None: self.preprocessors = None else: self.preprocessors = (preproc, ) self.apply() def apply(self): kernel = ["linear", "poly", "rbf", "sigmoid"][self.kernel_type] common_args = dict(kernel=kernel, degree=self.degree, gamma=self.gamma, coef0=self.coef0, tol=self.tol, max_iter=self.max_iter if self.limit_iter else -1, probability=True, preprocessors=self.preprocessors) if self.svmtype == 0: learner = SVMLearner(C=self.C, **common_args) else: learner = NuSVMLearner(nu=self.nu, **common_args) learner.name = self.learner_name classifier = None sv = None if self.data is not None: self.error([0, 1]) if not learner.check_learner_adequacy(self.data.domain): self.error(0, learner.learner_adequacy_err_msg) elif len(np.unique(self.data.Y)) < 2: self.error(1, "Data contains only one target value.") else: classifier = learner(self.data) classifier.name = self.learner_name sv = self.data[classifier.skl_model.support_] self.send("Learner", learner) self.send("Classifier", classifier) self.send("Support vectors", sv) def _on_kernel_changed(self): enabled = [ [False, False, False], # linear [True, True, True], # poly [True, False, False], # rbf [True, True, False] ] # sigmoid mask = enabled[self.kernel_type] for spin, enabled in zip(self._kernel_params, mask): spin.setEnabled(enabled)
class OWClusterAnalysis(OWWidget): name = "Cluster Analysis" description = ( "The widget displays differentially expressed genes that characterize the cluster, " "and corresponding gene terms that describe differentially expressed genes" ) icon = "../widgets/icons/OWClusterAnalysis.svg" priority = 110 class Inputs: data_table = Input('Data', Table) custom_sets = Input('Custom Gene Sets', Table) class Outputs: selected_data = Output('Selected Data', Table) gene_scores = Output('Gene Scores', Table) gene_set_scores = Output('Gene Set Scores', Table) class Information(OWWidget.Information): pass class Warning(OWWidget.Warning): gene_enrichment = Msg('{}, {}.') no_selected_gene_sets = Msg( 'No gene set selected, select them from Gene Sets box.') class Error(OWWidget.Error): no_cluster_indicator = Msg('No cluster indicator in the input data') gene_as_attributes = Msg( 'Genes, in the input data, are expected as column names') organism_mismatch = Msg( 'Organism in input data and custom gene sets does not match') cluster_batch_conflict = Msg( 'Cluster and batch must not be the same variable') settingsHandler = ClusterAnalysisContextHandler() cluster_indicators = ContextSetting([]) batch_indicator = ContextSetting(None) stored_gene_sets_selection = ContextSetting(()) scoring_method_selection = ContextSetting(0) scoring_method_design = ContextSetting(0) scoring_test_type = ContextSetting(0) # genes filter max_gene_count = Setting(20) use_gene_count_filter = Setting(True) max_gene_p_value = Setting(0.1) use_gene_pval_filter = Setting(False) max_gene_fdr = Setting(0.1) use_gene_fdr_filter = Setting(True) # gene sets filter min_gs_count = Setting(5) use_gs_count_filter = Setting(True) max_gs_p_value = Setting(0.1) use_gs_pval_filter = Setting(False) max_gs_fdr = Setting(0.1) use_gs_max_fdr = Setting(True) # auto commit results auto_commit = settings.Setting(False) custom_gene_set_indicator = settings.Setting(None) def __init__(self): super().__init__() # widget attributes self.input_data = None self.store_input_domain = None self.input_genes_names = [] self.input_genes_ids = [] self.tax_id = None self.use_attr_names = None self.gene_id_attribute = None # custom gene set input self.feature_model = itemmodels.DomainModel( valid_types=(DiscreteVariable, StringVariable)) self.custom_data = None self.custom_tax_id = None self.custom_use_attr_names = None self.custom_gene_id_attribute = None self.custom_gene_id_column = None self.num_of_custom_sets = None self.rows_by_cluster = None self.rows_by_batch = None self.clusters = [] self.new_cluster_profile = [] # data model self.cluster_info_model = None # Info info_box = vBox(self.controlArea, 'Info') self.input_info = widgetLabel(info_box) # Cluster selection self.cluster_indicator_model = itemmodels.DomainModel( valid_types=(DiscreteVariable, ), separators=False) self.cluster_indicator_box = widgetBox(self.controlArea, 'Cluster Indicator') self.cluster_indicator_view = listView( self.cluster_indicator_box, self, 'cluster_indicators', model=self.cluster_indicator_model, selectionMode=QListWidget.MultiSelection, callback=self.invalidate, sizeHint=QSize(256, 70), ) # Batch selection self.batch_indicator_model = itemmodels.DomainModel( valid_types=(DiscreteVariable, ), separators=False, placeholder="") box = widgetBox(self.controlArea, 'Batch Indicator') self.batch_indicator_combobox = comboBox( box, self, 'batch_indicator', model=self.batch_indicator_model, sendSelectedValue=True, callback=self.batch_indicator_changed, ) # Gene scoring box = widgetBox(self.controlArea, 'Gene Scoring') self.gene_scoring = GeneScoringWidget(box, self) self.gene_scoring.set_method_selection_area('scoring_method_selection') self.gene_scoring.set_method_design_area('scoring_method_design') self.gene_scoring.set_test_type('scoring_test_type') # Gene Sets widget gene_sets_box = widgetBox(self.controlArea, "Gene Sets") self.gs_widget = GeneSetsSelection(gene_sets_box, self, 'stored_gene_sets_selection') self.gs_widget.hierarchy_tree_widget.itemClicked.connect( self.__gene_sets_enrichment) # custom gene sets area box = vBox(self.controlArea, "Custom Gene Sets") if self.custom_gene_set_indicator not in self.feature_model: self.custom_gene_set_indicator = None self.gs_label_combobox = comboBox( box, self, "custom_gene_set_indicator", sendSelectedValue=True, model=self.feature_model, callback=self.handle_custom_gene_sets, ) self.gs_label_combobox.setDisabled(True) # main area splitter = QSplitter(Qt.Horizontal, self.mainArea) self.mainArea.layout().addWidget(splitter) genes_filter = widgetBox(splitter, 'Filter Genes', orientation=QHBoxLayout()) spin( genes_filter, self, 'max_gene_count', 0, 10000, label='Count', tooltip='Minimum genes count', checked='use_gene_count_filter', callback=self.filter_genes, callbackOnReturn=True, checkCallback=self.filter_genes, ) doubleSpin( genes_filter, self, 'max_gene_p_value', 0.0, 1.0, 0.0001, label='p-value', tooltip='Maximum p-value of the enrichment score', checked='use_gene_pval_filter', callback=self.filter_genes, callbackOnReturn=True, checkCallback=self.filter_genes, ) doubleSpin( genes_filter, self, 'max_gene_fdr', 0.0, 1.0, 0.0001, label='FDR', tooltip='Maximum false discovery rate', checked='use_gene_fdr_filter', callback=self.filter_genes, callbackOnReturn=True, checkCallback=self.filter_genes, ) gene_sets_filter = widgetBox(splitter, 'Filter Gene Sets', orientation=QHBoxLayout()) spin( gene_sets_filter, self, 'min_gs_count', 0, DISPLAY_GENE_SETS_COUNT, label='Count', tooltip='Minimum genes count', checked='use_gs_count_filter', callback=self.filter_gene_sets, callbackOnReturn=True, checkCallback=self.filter_gene_sets, ) doubleSpin( gene_sets_filter, self, 'max_gs_p_value', 0.0, 1.0, 0.0001, label='p-value', tooltip='Maximum p-value of the enrichment score', checked='use_gs_pval_filter', callback=self.filter_gene_sets, callbackOnReturn=True, checkCallback=self.filter_gene_sets, ) doubleSpin( gene_sets_filter, self, 'max_gs_fdr', 0.0, 1.0, 0.0001, label='FDR', tooltip='Maximum false discovery rate', checked='use_gs_max_fdr', callback=self.filter_gene_sets, callbackOnReturn=True, checkCallback=self.filter_gene_sets, ) self.cluster_info_view = QTableView() self.cluster_info_view.verticalHeader().setVisible(False) self.cluster_info_view.setItemDelegate(HTMLDelegate()) self.cluster_info_view.horizontalHeader().hide() self.cluster_info_view.horizontalHeader().setSectionResizeMode( QHeaderView.Stretch) auto_commit(self.controlArea, self, "auto_commit", "&Commit", box=False) self.mainArea.layout().addWidget(self.cluster_info_view) def sizeHint(self): return QSize(800, 600) def __update_info_box(self): info_string = '' if self.input_genes_ids: info_string += '{} samples, {} clusters\n'.format( self.input_data.X.shape[0], len(self.clusters) if self.clusters else '?') info_string += '{:,d} unique genes\n'.format( len(self.input_genes_ids)) else: info_string += 'No genes on input.\n' if self.custom_data: info_string += '{} marker genes in {} sets\n'.format( self.custom_data.X.shape[0], self.num_of_custom_sets) self.input_info.setText(info_string) def __set_cluster_info_model(self): self.cluster_info_view.setModel(None) self.cluster_info_model = ClusterModel(self) self.cluster_info_model.add_rows(self.clusters) # add model to the view self.cluster_info_view.setModel(self.cluster_info_model) # call sizeHint function self.cluster_info_view.resizeRowsToContents() self.cluster_info_view.selectionModel().selectionChanged.connect( self.commit) def __create_temp_class_var(self): """ See no evil !""" cluster_indicator_name = 'Cluster indicators' row_profile = None new_cluster_values = [] var_index_lookup = { val: idx for var in self.cluster_indicators for idx, val in enumerate(var.values) } cart_prod = itertools.product( *[cluster.values for cluster in self.cluster_indicators]) for comb in cart_prod: new_cluster_values.append(', '.join([val for val in comb])) self.new_cluster_profile.append( [var_index_lookup[val] for val in comb]) row_profile_lookup = { tuple(profile): indx for indx, (profile, _) in enumerate( zip(self.new_cluster_profile, new_cluster_values)) } for var in self.cluster_indicators: if row_profile is None: row_profile = np.asarray( self.input_data.get_column_view(var)[0], dtype=int) else: row_profile = np.vstack( (row_profile, np.asarray(self.input_data.get_column_view(var)[0], dtype=int))) ca_ind = DiscreteVariable.make( cluster_indicator_name, values=[val for val in new_cluster_values], ordered=True) domain = Domain( self.input_data.domain.attributes, self.input_data.domain.class_vars, self.input_data.domain.metas + (ca_ind, ), ) table = self.input_data.transform(domain) table[:, ca_ind] = np.array( [[row_profile_lookup[tuple(row_profile[:, i])]] for i in range(row_profile.shape[1])]) self.input_data = table return ca_ind def __set_clusters(self): self.clusters = [] self.new_cluster_profile = [] self.cluster_var = None if self.cluster_indicators and self.input_data: if isinstance(self.cluster_indicators, list) and len(self.cluster_indicators) > 1: self.cluster_var = self.__create_temp_class_var() else: self.cluster_var = self.cluster_indicators[0] self.rows_by_cluster = np.asarray(self.input_data.get_column_view( self.cluster_var)[0], dtype=int) for index, name in enumerate(self.cluster_var.values): cluster = Cluster(name, index) self.clusters.append(cluster) cluster.set_genes(self.input_genes_names, self.input_genes_ids) def __set_batch(self): self.Error.cluster_batch_conflict.clear() self.rows_by_batch = None if self.batch_indicator == self.cluster_var: self.Error.cluster_batch_conflict() return if self.batch_indicator and self.input_data: self.rows_by_batch = np.asarray(self.input_data.get_column_view( self.batch_indicator)[0], dtype=int) def __set_genes(self): self.input_genes_names = [] self.input_genes_ids = [] if self.use_attr_names: for variable in self.input_data.domain.attributes: self.input_genes_names.append(str(variable.name)) self.input_genes_ids.append( str(variable.attributes.get(self.gene_id_attribute, np.nan))) def filter_genes(self): if self.cluster_info_model: # filter genes # note: after gene filter is applied, we need to recalculate gene set enrichment self.cluster_info_model.apply_gene_filters( self.max_gene_p_value if self.use_gene_pval_filter else None, self.max_gene_fdr if self.use_gene_fdr_filter else None, self.max_gene_count if self.use_gene_count_filter else None, ) # recalculate gene set enrichment self.__gene_sets_enrichment() # call sizeHint function self.cluster_info_view.resizeRowsToContents() # commit changes after filter self.commit() def filter_gene_sets(self): if self.cluster_info_model: # filter gene sets self.cluster_info_model.apply_gene_sets_filters( self.max_gs_p_value if self.use_gs_pval_filter else None, self.max_gs_fdr if self.use_gs_max_fdr else None, self.min_gs_count if self.use_gs_count_filter else None, ) # call sizeHint function self.cluster_info_view.resizeRowsToContents() def __gene_enrichment(self): design = bool(self.gene_scoring.get_selected_desig() ) # if true cluster vs. cluster else cluster vs rest test_type = self.gene_scoring.get_selected_test_type() method = self.gene_scoring.get_selected_method() try: if method.score_function == score_hypergeometric_test: values = set(np.unique(self.input_data.X)) if (0 not in values) or (len(values) != 2): raise ValueError('Binary data expected (use Preprocess)') self.cluster_info_model.score_genes( design=design, table_x=self.input_data.X, rows_by_cluster=self.rows_by_cluster, rows_by_batch=self.rows_by_batch, method=method, alternative=test_type, ) except ValueError as e: self.Warning.gene_enrichment(str(e), 'p-values are set to 1') def __gene_sets_enrichment(self): if self.input_data: self.Warning.no_selected_gene_sets.clear() all_sets = self.gs_widget.get_hierarchies() selected_sets = self.gs_widget.get_hierarchies(only_selected=True) if len(selected_sets) == 0 and len(all_sets) > 0: self.Warning.no_selected_gene_sets() # save setting on selected hierarchies self.stored_gene_sets_selection = tuple(selected_sets) ref_genes = set(self.input_genes_ids) try: self.cluster_info_model.gene_sets_enrichment( self.gs_widget.gs_object, selected_sets, ref_genes) except Exception as e: # TODO: possible exceptions? raise e self.filter_gene_sets() def invalidate(self, cluster_init=True): if self.input_data is not None and self.tax_id is not None: self.Warning.gene_enrichment.clear() if self.cluster_info_model is not None: self.cluster_info_model.cancel() self.__set_genes() if cluster_init: self.__set_clusters() self.__set_batch() self.__set_cluster_info_model() # note: when calling self.__gene_enrichment we calculate gse automatically. # No need to call self.__gene_sets_enrichment here self.__gene_enrichment() self.__update_info_box() def batch_indicator_changed(self): self.invalidate(cluster_init=False) @Inputs.data_table def handle_input(self, data): self.closeContext() self.Warning.clear() self.Error.clear() self.input_data = None self.store_input_domain = None self.stored_gene_sets_selection = () self.input_genes_names = [] self.input_genes_ids = [] self.tax_id = None self.use_attr_names = None self.gene_id_attribute = None self.clusters = None self.gs_widget.clear() self.gs_widget.clear_gene_sets() self.cluster_info_view.setModel(None) self.cluster_indicators = [] self.cluster_var = None self.batch_indicator = None self.cluster_indicator_model.set_domain(None) self.batch_indicator_model.set_domain(None) self.__update_info_box() if data: self.input_data = data self.cluster_indicator_model.set_domain(self.input_data.domain) self.batch_indicator_model.set_domain(self.input_data.domain) # For Cluster Indicator do not use categorical variables that contain only one value. self.cluster_indicator_model.wrap([ item for item in self.cluster_indicator_model if len(item.values) > 1 ]) # First value in batch indicator model is a NoneType, # we can skip it when we validate categorical variables self.batch_indicator_model.wrap(self.batch_indicator_model[:1] + [ item for item in self.batch_indicator_model[1:] if len(item.values) > 1 ]) self.tax_id = self.input_data.attributes.get(TAX_ID, None) self.use_attr_names = self.input_data.attributes.get( GENE_AS_ATTRIBUTE_NAME, None) self.gene_id_attribute = self.input_data.attributes.get( GENE_ID_ATTRIBUTE, None) if not self.cluster_indicator_model: self.Error.no_cluster_indicator() return elif not self.use_attr_names: self.Error.gene_as_attributes() return self.openContext(self.input_data.domain) self.gs_widget.load_gene_sets(self.tax_id) if self.cluster_indicator_model and len( self.cluster_indicators) < 1: self.cluster_indicators = [self.cluster_indicator_model[0]] if self.batch_indicator_model and self.batch_indicator is None: self.batch_indicator = self.batch_indicator_model[0] self.invalidate() if self.custom_data: self.refresh_custom_gene_sets() self._handle_future_model() self.handle_custom_gene_sets() @Inputs.custom_sets def handle_custom_input(self, data): self.Error.clear() self.Warning.clear() self.closeContext() self.custom_data = None self.custom_tax_id = None self.custom_use_attr_names = None self.custom_gene_id_attribute = None self.custom_gene_id_column = None self.num_of_custom_sets = None self.feature_model.set_domain(None) if data: self.custom_data = data self.feature_model.set_domain(self.custom_data.domain) self.custom_tax_id = str( self.custom_data.attributes.get(TAX_ID, None)) self.custom_use_attr_names = self.custom_data.attributes.get( GENE_AS_ATTRIBUTE_NAME, None) self.custom_gene_id_attribute = self.custom_data.attributes.get( GENE_ID_ATTRIBUTE, None) self.custom_gene_id_column = self.custom_data.attributes.get( GENE_ID_COLUMN, None) self._handle_future_model() if self.input_data: self.openContext(self.input_data.domain) self.gs_label_combobox.setDisabled(True) self.refresh_custom_gene_sets() self.handle_custom_gene_sets(select_customs_flag=True) def __check_organism_mismatch(self): """ Check if organisms from different inputs match. :return: True if there is a mismatch """ if self.tax_id is not None and self.custom_tax_id is not None: return self.tax_id != self.custom_tax_id return False def _handle_future_model(self): if self.custom_gene_set_indicator in self.feature_model: index = self.feature_model.indexOf(self.custom_gene_set_indicator) self.custom_gene_set_indicator = self.feature_model[index] else: if self.feature_model: self.custom_gene_set_indicator = self.feature_model[0] else: self.custom_gene_set_indicator = None def handle_custom_gene_sets(self, select_customs_flag=False): if self.custom_gene_set_indicator: if self.custom_data is not None and self.custom_gene_id_column is not None: if self.__check_organism_mismatch(): self.gs_label_combobox.setDisabled(True) self.Error.organism_mismatch() self.gs_widget.update_gs_hierarchy() self.__gene_sets_enrichment() return if isinstance(self.custom_gene_set_indicator, DiscreteVariable): labels = self.custom_gene_set_indicator.values gene_sets_names = [ labels[int(idx)] for idx in self.custom_data.get_column_view( self.custom_gene_set_indicator)[0] ] else: gene_sets_names, _ = self.custom_data.get_column_view( self.custom_gene_set_indicator) self.num_of_custom_sets = len(set(gene_sets_names)) gene_names, _ = self.custom_data.get_column_view( self.custom_gene_id_column) hierarchy_title = (self.custom_data.name if self.custom_data.name else 'Custom sets', ) try: self.gs_widget.add_custom_sets( gene_sets_names, gene_names, hierarchy_title=hierarchy_title, select_customs_flag=select_customs_flag, ) except GeneSetException: pass self.gs_label_combobox.setDisabled(False) else: self.gs_widget.update_gs_hierarchy() self.__gene_sets_enrichment() self.__update_info_box() def refresh_custom_gene_sets(self): self.gs_widget.clear_custom_sets() # self.gs_widget.update_gs_hierarchy() def gene_scores_output(self, selected_clusters): metas = [ StringVariable('Gene'), StringVariable(ENTREZ_ID), StringVariable('Rank'), ContinuousVariable('Statistic score'), ContinuousVariable('P-value'), ContinuousVariable('FDR'), ] if len(self.new_cluster_profile): # note: order is important metas = self.cluster_indicators + metas domain = Domain([], metas=metas, class_vars=self.cluster_var) data = [] for cluster in selected_clusters: num_of_genes = len(cluster.filtered_genes) scores = [gene.score for gene in cluster.filtered_genes] p_vals = [gene.p_val for gene in cluster.filtered_genes] fdr_vals = [gene.fdr for gene in cluster.filtered_genes] gene_names = [ gene.input_identifier for gene in cluster.filtered_genes ] gene_ids = [gene.gene_id for gene in cluster.filtered_genes] rank = rankdata(p_vals, method='min') if len(self.new_cluster_profile): profiles = [[cluster.index] * num_of_genes] [ profiles.append([p] * num_of_genes) for p in self.new_cluster_profile[cluster.index] ] else: profiles = [[cluster.index] * num_of_genes] for row in zip(*profiles, gene_names, gene_ids, rank, scores, p_vals, fdr_vals): data.append(list(row)) out_data = Table(domain, data) out_data.attributes[TAX_ID] = self.tax_id out_data.attributes[GENE_AS_ATTRIBUTE_NAME] = False out_data.attributes[GENE_ID_COLUMN] = ENTREZ_ID self.Outputs.gene_scores.send(out_data) def gene_set_scores_output(self, selected_clusters): metas = [ StringVariable('Term'), StringVariable('Term ID'), StringVariable('Rank'), ContinuousVariable('P-value'), ContinuousVariable('FDR'), ] if len(self.new_cluster_profile): # note: order is important metas = self.cluster_indicators + metas domain = Domain([], metas=metas, class_vars=self.cluster_var) data = [] for cluster in selected_clusters: num_of_sets = len(cluster.filtered_gene_sets) p_vals = [gs.p_val for gs in cluster.filtered_gene_sets] fdr_vals = [gs.fdr for gs in cluster.filtered_gene_sets] gs_names = [gs.name for gs in cluster.filtered_gene_sets] gs_ids = [gs.gs_id for gs in cluster.filtered_gene_sets] rank = rankdata(p_vals, method='min') if len(self.new_cluster_profile): profiles = [[cluster.index] * num_of_sets] [ profiles.append([p] * num_of_sets) for p in self.new_cluster_profile[cluster.index] ] else: profiles = [[cluster.index] * num_of_sets] for row in zip(*profiles, gs_names, gs_ids, rank, p_vals, fdr_vals): data.append(list(row)) self.Outputs.gene_set_scores.send(Table(domain, data)) def commit(self): selection_model = self.cluster_info_view.selectionModel() selected_rows = selection_model.selectedRows() selected_clusters = [] selected_cluster_indexes = set() selected_cluster_genes = set() if not self.input_data or not selected_rows: self.Outputs.selected_data.send(None) return for sel_row in selected_rows: cluster = sel_row.data() selected_clusters.append(cluster) selected_cluster_indexes.add(cluster.index) [ selected_cluster_genes.add(gene.gene_id) for gene in cluster.filtered_genes ] # get columns of selected clusters selected_columns = [ column for column in self.input_data.domain.attributes if self.gene_id_attribute in column.attributes and str(column.attributes[ self.gene_id_attribute]) in selected_cluster_genes ] domain = Domain(selected_columns, self.input_data.domain.class_vars, self.input_data.domain.metas) output_data = self.input_data.from_table(domain, self.input_data) # get rows of selected clusters selected_rows = [ row_index for row_index, col_index in enumerate(self.rows_by_cluster) if col_index in selected_cluster_indexes ] # send to output signal self.Outputs.selected_data.send(output_data[selected_rows]) self.gene_scores_output(selected_clusters) self.gene_set_scores_output(selected_clusters)
class OWSVMClassification(OWBaseSVM): name = "SVM" description = "Support Vector Machines map inputs to higher-dimensional " \ "feature spaces that best separate different classes. " icon = "icons/SVM.svg" priority = 50 LEARNER = SVMLearner outputs = [("Support vectors", Table)] # 0: c_svc, 1: nu_svc svmtype = settings.Setting(0) C = settings.Setting(1.0) nu = settings.Setting(0.5) shrinking = settings.Setting(True), probability = settings.Setting(False) max_iter = settings.Setting(100) limit_iter = settings.Setting(True) def _add_type_box(self): form = QtGui.QGridLayout() self.type_box = box = gui.radioButtonsInBox( self.controlArea, self, "svmtype", [], box="SVM Type", orientation=form, callback=self.settings_changed) form.addWidget(gui.appendRadioButton(box, "C-SVM", addToLayout=False), 0, 0, Qt.AlignLeft) form.addWidget(QtGui.QLabel("Cost (C):"), 0, 1, Qt.AlignRight) form.addWidget(gui.doubleSpin(box, self, "C", 1e-3, 1000.0, 0.1, decimals=3, alignment=Qt.AlignRight, controlWidth=80, addToLayout=False, callback=self.settings_changed), 0, 2) form.addWidget(gui.appendRadioButton(box, "ν-SVM", addToLayout=False), 1, 0, Qt.AlignLeft) form.addWidget(QtGui.QLabel("Complexity (ν):"), 1, 1, Qt.AlignRight) form.addWidget(gui.doubleSpin(box, self, "nu", 0.05, 1.0, 0.05, decimals=2, alignment=Qt.AlignRight, controlWidth=80, addToLayout=False, callback=self.settings_changed), 1, 2) def _add_optimization_box(self): super()._add_optimization_box() gui.spin(self.optimization_box, self, "max_iter", 50, 1e6, 50, label="Iteration limit:", checked="limit_iter", alignment=Qt.AlignRight, controlWidth=100, callback=self.settings_changed) def create_learner(self): kernel = ["linear", "poly", "rbf", "sigmoid"][self.kernel_type] common_args = dict( kernel=kernel, degree=self.degree, gamma=self.gamma, coef0=self.coef0, tol=self.tol, max_iter=self.max_iter if self.limit_iter else -1, probability=True, preprocessors=self.preprocessors ) if self.svmtype == 0: return SVMLearner(C=self.C, **common_args) else: return NuSVMLearner(nu=self.nu, **common_args) def get_learner_parameters(self): items = OrderedDict() if self.svmtype == 0: items["SVM type"] = "C-SVM, C={}".format(self.C) else: items["SVM type"] = "ν-SVM, ν={}".format(self.nu) self._report_kernel_parameters(items) items["Numerical tolerance"] = "{:.6}".format(self.tol) items["Iteration limt"] = self.max_iter if self.limit_iter else "unlimited" return items
class OWBaseSVM(OWBaseLearner): #: Kernel types Linear, Poly, RBF, Sigmoid = 0, 1, 2, 3 #: Selected kernel type kernel_type = settings.Setting(RBF) #: kernel degree degree = settings.Setting(3) #: gamma gamma = settings.Setting(1.0) #: coef0 (adative constant) coef0 = settings.Setting(0.0) #: numerical tolerance tol = settings.Setting(0.001) kernels = (("Linear", "x⋅y"), ("Polynomial", "(g x⋅y + c)<sup>d</sup>"), ("RBF", "exp(-g|x-y|²)"), ("Sigmoid", "tanh(g x⋅y + c)")) def _add_kernel_box(self): # Initialize with the widest label to measure max width self.kernel_eq = self.kernels[-1][1] box = gui.hBox(self.controlArea, "Kernel") self.kernel_box = buttonbox = gui.radioButtonsInBox( box, self, "kernel_type", btnLabels=[k[0] for k in self.kernels], callback=self._on_kernel_changed, addSpace=20) buttonbox.layout().setSpacing(10) gui.rubber(buttonbox) parambox = gui.vBox(box) gui.label(parambox, self, "Kernel: %(kernel_eq)s") common = dict(orientation=Qt.Horizontal, callback=self.settings_changed, alignment=Qt.AlignRight, controlWidth=80) spbox = gui.hBox(parambox) gui.rubber(spbox) inbox = gui.vBox(spbox) gamma = gui.doubleSpin( inbox, self, "gamma", 0.0, 10.0, 0.01, label=" g: ", **common) coef0 = gui.doubleSpin( inbox, self, "coef0", 0.0, 10.0, 0.01, label=" c: ", **common) degree = gui.doubleSpin( inbox, self, "degree", 0.0, 10.0, 0.5, label=" d: ", **common) self._kernel_params = [gamma, coef0, degree] gui.rubber(parambox) # This is the maximal height (all double spins are visible) # and the maximal width (the label is initialized to the widest one) box.layout().activate() box.setFixedHeight(box.sizeHint().height()) box.setMinimumWidth(box.sizeHint().width()) def _add_optimization_box(self): self.optimization_box = gui.vBox( self.controlArea, "Optimization Parameters") gui.doubleSpin( self.optimization_box, self, "tol", 1e-6, 1.0, 1e-5, label="Numerical tolerance:", decimals=6, alignment=Qt.AlignRight, controlWidth=100, callback=self.settings_changed) def add_main_layout(self): self._add_type_box() self._add_kernel_box() self._add_optimization_box() self._show_right_kernel() def _show_right_kernel(self): enabled = [[False, False, False], # linear [True, True, True], # poly [True, False, False], # rbf [True, True, False]] # sigmoid self.kernel_eq = self.kernels[self.kernel_type][1] mask = enabled[self.kernel_type] for spin, enabled in zip(self._kernel_params, mask): [spin.box.hide, spin.box.show][enabled]() def _on_kernel_changed(self): self._show_right_kernel() self.settings_changed() def _report_kernel_parameters(self, items): if self.kernel_type == 0: items["Kernel"] = "Linear" elif self.kernel_type == 1: items["Kernel"] = \ "Polynomial, ({g:.4} x⋅y + {c:.4})<sup>{d}</sup>".format( g=self.gamma, c=self.coef0, d=self.degree) elif self.kernel_type == 2: items["Kernel"] = "RBF, exp(-{:.4}|x-y|²)".format(self.gamma) else: items["Kernel"] = "Sigmoid, tanh({g:.4} x⋅y + {c:.4})".format( g=self.gamma, c=self.coef0) def update_model(self): super().update_model() sv = None if self.valid_data: sv = self.data[self.model.skl_model.support_] self.send("Support vectors", sv)
class OWLogisticRegression(OWBaseLearner): name = "Logistic Regression" description = "The logistic regression classification algorithm with " \ "LASSO (L1) or ridge (L2) regularization." icon = "icons/LogisticRegression.svg" priority = 60 LEARNER = LogisticRegressionLearner outputs = [("Coefficients", Table)] penalty_type = settings.Setting(1) C_index = settings.Setting(61) C_s = list(chain(range(1000, 200, -50), range(200, 100, -10), range(100, 20, -5), range(20, 0, -1), [x / 10 for x in range(9, 2, -1)], [x / 100 for x in range(20, 2, -1)], [x / 1000 for x in range(20, 0, -1)])) dual = False tol = 0.0001 fit_intercept = True intercept_scaling = 1.0 penalty_types = ("Lasso (L1)", "Ridge (L2)") def add_main_layout(self): box = gui.widgetBox(self.controlArea, box=True) gui.comboBox(box, self, "penalty_type", label="Regularization type: ", items=self.penalty_types, orientation=Qt.Horizontal, addSpace=4, callback=self.settings_changed) gui.widgetLabel(box, "Strength:") box2 = gui.hBox(gui.indentedBox(box)) gui.widgetLabel(box2, "Weak").setStyleSheet("margin-top:6px") gui.hSlider(box2, self, "C_index", minValue=0, maxValue=len(self.C_s) - 1, callback=self.set_c, createLabel=False) gui.widgetLabel(box2, "Strong").setStyleSheet("margin-top:6px") box2 = gui.hBox(box) box2.layout().setAlignment(Qt.AlignCenter) self.c_label = gui.widgetLabel(box2) self.set_c() def set_c(self): self.C = self.C_s[self.C_index] fmt = "C={}" if self.C >= 1 else "C={:.3f}" self.c_label.setText(fmt.format(self.C)) self.settings_changed() def create_learner(self): penalty = ["l1", "l2"][self.penalty_type] return self.LEARNER( penalty=penalty, dual=self.dual, tol=self.tol, C=self.C, fit_intercept=self.fit_intercept, intercept_scaling=self.intercept_scaling, preprocessors=self.preprocessors ) def update_model(self): super().update_model() coef_table = None if self.valid_data: coef_table = create_coef_table(self.model) self.send("Coefficients", coef_table) def get_learner_parameters(self): return (("Regularization", "{}, C={}".format( self.penalty_types[self.penalty_type], self.C_s[self.C_index])),)
class OWParallelCoordinates(widget.OWWidget): name = "Parallel Coordinates" description = "Parallel coordinates display of multi-dimensional data." icon = "icons/ParallelCoordinates.svg" priority = 900 inputs = [("Data", Table, 'set_data', widget.Default), ("Features", widget.AttributeList, 'set_shown_attributes')] outputs = [("Selected Data", Table, widget.Default), ("Annotated Data", Table), ("Features", widget.AttributeList)] graph_name = 'graph' settingsHandler = settings.DomainContextHandler() autocommit = settings.Setting(True) selected_attrs = settings.ContextSetting([]) color_attr = settings.ContextSetting('') constraint_range = settings.ContextSetting({}) autocommit = settings.Setting(default=True) UserAdviceMessages = [ widget.Message( 'You can select subsets of data based on value intervals ' 'by dragging on the corresponding dimensions\' axes.\n\n' 'You can reset the selection by clicking somewhere ' 'outside the selected interval on the axis.', 'subset-selection') ] class Warning(widget.OWWidget.Warning): too_many_selected_dimensions = widget.Msg( 'Too many dimensions selected ({}). Only first {} shown.') class Information(widget.OWWidget.Information): dataset_sampled = widget.Msg('Showing a random sample of your data.') OPTIMIZATION_N_DIMS = (3, 9) MAX_N_DIMS = 20 def __init__(self): super().__init__() self.graph = ParallelCoordinates(self) self.mainArea.layout().addWidget(self.graph) self.model = DomainModel(separators=False, valid_types=DomainModel.PRIMITIVE) self.colormodel = DomainModel(valid_types=DomainModel.PRIMITIVE) box = gui.vBox(self.controlArea, 'Lines') combo = gui.comboBox(box, self, 'color_attr', sendSelectedValue=True, label='Color:', orientation=Qt.Horizontal, callback=self.update_plot) combo.setModel(self.colormodel) box = gui.vBox(self.controlArea, 'Dimensions') view = gui.listView(box, self, 'selected_attrs', model=self.model, callback=self.update_plot) view.setSelectionMode(view.ExtendedSelection) # Prevent drag selection. Otherwise, each new addition to selectio`n # the mouse passes over triggers a webview redraw. Sending lots of data # around multiple times on large datasets results in stalling and crashes. view.mouseMoveEvent = ( lambda event: None if view.state() == view.DragSelectingState else super( view.__class__, view).mouseMoveEvent(event)) self.optimize_button = gui.button( box, self, 'Optimize Selected Dimensions', callback=self.optimize, tooltip='Optimize visualized dimensions by maximizing cumulative ' 'Kendall rank correlation coefficient.') gui.auto_commit(self.controlArea, self, 'autocommit', '&Apply') def set_data(self, data): self.data = data self.graph.clear() self.closeContext() model = self.model colormodel = self.colormodel self.sample = None self.selected_attrs = None self.color_attr = None N_SAMPLE = 2000 if data is not None and len(data) and len(data.domain): self.sample = slice( None) if len(data) <= N_SAMPLE else np.random.choice( np.arange(len(data)), N_SAMPLE, replace=False) model.set_domain(data.domain) colormodel.set_domain(data.domain) self.color_attr = try_(lambda: data.domain.class_vars[0].name, None) selected_attrs = (model.data(model.index(i, 0)) for i in range( min(self.OPTIMIZATION_N_DIMS[1], model.rowCount()))) self.selected_attrs = [ attr for attr in selected_attrs if isinstance(attr, str) ] else: model.set_domain(None) colormodel.set_domain(None) self.Information.dataset_sampled( shown=False if data is None else len(data) > N_SAMPLE) self.openContext(data.domain) self.update_plot() self.commit() def clear(self): self.graph.clear() self.commit() def update_plot(self): data = self.data if data is None or not len(data): self.clear() return self.optimize_button.setDisabled(not self.is_optimization_valid()) self.Warning.too_many_selected_dimensions( len(self.selected_attrs), self.MAX_N_DIMS, shown=len(self.selected_attrs) > self.MAX_N_DIMS) selected_attrs = self.selected_attrs[:self.MAX_N_DIMS] sample = self.sample dimensions = [] for attr in selected_attrs: attr = data.domain[attr] values = data.get_column_view(attr)[0][sample] dim = dict(label=attr.name, values=values, constraintrange=self.constraint_range.get(attr.name)) if attr.is_discrete: dim.update(tickvals=np.arange(len(attr.values)), ticktext=attr.values) elif isinstance(attr, TimeVariable): tickvals = [ np.nanmin(values), np.nanmedian(values), np.nanmax(values) ] ticktext = [attr.repr_val(i) for i in tickvals] dim.update(tickvals=tickvals, ticktext=ticktext) dimensions.append(dim) # Compute color legend line = dict() padding_right = 40 if self.color_attr: attr = data.domain[self.color_attr] values = data.get_column_view(attr)[0][sample] line.update(color=values, showscale=True) title = '<br>'.join( textwrap.wrap(attr.name.strip(), width=7, max_lines=4, placeholder='…')) if attr.is_discrete: padding_right = 90 colors = [color_to_hex(i) for i in attr.colors] values_short = [ textwrap.fill(value, width=9, max_lines=1, placeholder='…') for value in attr.values ] self.graph.exposeObject( 'discrete_colorbar', dict(colors=colors, title=title, values=attr.values, values_short=values_short)) line.update(showscale=False, colorscale=list( zip(np.linspace(0, 1, len(attr.values)), colors))) else: padding_right = 0 self.graph.exposeObject('discrete_colorbar', {}) line.update(colorscale=list( zip((0, 1), (color_to_hex(i) for i in attr.colors[:-1]))), colorbar=dict(title=title)) if isinstance(attr, TimeVariable): tickvals = [ np.nanmin(values), np.nanmedian(values), np.nanmax(values) ] ticktext = [attr.repr_val(i) for i in tickvals] line.update(colorbar=dict(title=title, tickangle=-90, tickvals=tickvals, ticktext=ticktext)) self.graph.plot([Parcoords(line=line, dimensions=dimensions)], padding_right=padding_right) def set_shown_attributes(self, attrs): self.selected_attrs = attrs self.update_plot() def commit(self): selected_data, annotated_data = None, None data = self.data if data is not None and len(data): mask = np.ones(len(data), dtype=bool) for attr, (min, max) in self.constraint_range.items(): values = data.get_column_view(attr)[0] mask &= (values >= min) & (values <= max) selected_data = data[mask] annotated_data = create_annotated_table(data, mask) self.send('Selected Data', selected_data) self.send('Annotated Data', annotated_data) self.send('Features', widget.AttributeList(self.selected_attrs)) def is_optimization_valid(self): return (self.OPTIMIZATION_N_DIMS[0] <= len(self.selected_attrs) <= self.OPTIMIZATION_N_DIMS[1]) def optimize(self): """ Optimizes the order of selected dimensions. """ data = self.data if data is None or not len(data): return if not self.is_optimization_valid(): QMessageBox( QMessageBox.Warning, "Parallel Coordinates Optimization", "Can only optimize when the number of selected dimensions " "is between {} and {}. " "Sorry.".format(*self.OPTIMIZATION_N_DIMS), QMessageBox.Abort, self).exec() return self.optimize_button.blockSignals(True) R = {} Rc = {} sample = slice(None) if len(data) < 300 else np.random.choice( np.arange(len(data)), 300, replace=False) for attr1 in self.selected_attrs: if self.color_attr: Rc[attr1] = kendalltau(data.get_column_view(attr1)[0][sample], data.get_column_view( self.color_attr)[0][sample], nan_policy='omit')[0] for attr2 in self.selected_attrs: if (attr1, attr2) in R or attr1 == attr2: continue R[(attr1, attr2)] = R[(attr2, attr1)] = \ kendalltau(data.get_column_view(attr1)[0][sample], data.get_column_view(attr2)[0][sample], nan_policy='omit')[0] # First dimension is the one with the highest correlation with the # color attribute; the last dimension the one with the lowest # correlation with the first dimension. # If there is no color attribute, first and last are the two dimensions # with the lowest correlation. # In either case, the rest are filled in in the order of maximal # cumulative correlation. if self.color_attr: head = max(Rc.items(), key=lambda i: i[1])[0] tail = min( ((key, value) for key, value in R.items() if key[0] == head), key=lambda i: i[1])[0][1] else: head, tail = min(R.items(), key=lambda i: i[1])[0] def cumsum(permutation): return sum(R[(attr1, attr2)] for attr1, attr2 in pairwise((head, ) + permutation + (tail, ))) body = max(itertools.permutations( set(self.selected_attrs) - set([head, tail])), key=cumsum) self.selected_attrs = (head, ) + body + (tail, ) self.update_plot() self.optimize_button.blockSignals(False) def send_report(self): self.report_items((('Dimensions', list(self.selected_attrs)), ('Color', self.color_attr))) self.report_plot()
class OWSilhouettePlot(widget.OWWidget): name = "Silhouette Plot" description = "Silhouette Plot" icon = "icons/Silhouette.svg" inputs = [("Data", Orange.data.Table, "set_data")] outputs = [("Selected Data", Orange.data.Table, widget.Default), ("Other Data", Orange.data.Table)] replaces = ["orangecontrib.prototypes.widgets.owsilhouetteplot.OWSilhouettePlot"] settingsHandler = settings.PerfectDomainContextHandler() #: Distance metric index distance_idx = settings.Setting(0) #: Group/cluster variable index cluster_var_idx = settings.ContextSetting(0) #: Annotation variable index annotation_var_idx = settings.ContextSetting(0) #: Group the silhouettes by cluster group_by_cluster = settings.Setting(True) #: A fixed size for an instance bar bar_size = settings.Setting(3) #: Add silhouette scores to output data add_scores = settings.Setting(False) auto_commit = settings.Setting(False) Distances = [("Euclidean", Orange.distance.Euclidean), ("Manhattan", Orange.distance.Manhattan)] def __init__(self): super().__init__() self.data = None self._effective_data = None self._matrix = None self._silhouette = None self._labels = None self._silplot = None box = gui.widgetBox(self.controlArea, "Settings",) gui.comboBox(box, self, "distance_idx", label="Distance", items=[name for name, _ in OWSilhouettePlot.Distances], callback=self._invalidate_distances) self.cluster_var_cb = gui.comboBox( box, self, "cluster_var_idx", label="Cluster", callback=self._invalidate_scores) self.cluster_var_model = itemmodels.VariableListModel(parent=self) self.cluster_var_cb.setModel(self.cluster_var_model) gui.spin(box, self, "bar_size", minv=1, maxv=10, label="Bar Size", callback=self._update_bar_size) gui.checkBox(box, self, "group_by_cluster", "Group by cluster", callback=self._replot) self.annotation_cb = gui.comboBox( box, self, "annotation_var_idx", label="Annotations", callback=self._update_annotations) self.annotation_var_model = itemmodels.VariableListModel(parent=self) self.annotation_var_model[:] = ["None"] self.annotation_cb.setModel(self.annotation_var_model) gui.rubber(self.controlArea) box = gui.widgetBox(self.controlArea, "Output") gui.checkBox(box, self, "add_scores", "Add silhouette scores",) gui.auto_commit(box, self, "auto_commit", "Commit", box=False) self.scene = QtGui.QGraphicsScene() self.view = QtGui.QGraphicsView(self.scene) self.view.setRenderHint(QtGui.QPainter.Antialiasing, True) self.view.setAlignment(Qt.AlignTop | Qt.AlignLeft) self.mainArea.layout().addWidget(self.view) def sizeHint(self): sh = self.controlArea.sizeHint() return sh.expandedTo(QtCore.QSize(600, 720)) @check_sql_input def set_data(self, data): """ Set the input data set. """ self.closeContext() self.clear() error_msg = "" warning_msg = "" candidatevars = [] if data is not None: candidatevars = [v for v in data.domain.variables + data.domain.metas if v.is_discrete and len(v.values) >= 2] if not candidatevars: error_msg = "Input does not have any suitable cluster labels." data = None if data is not None: ncont = sum(v.is_continuous for v in data.domain.attributes) ndiscrete = len(data.domain.attributes) - ncont if ncont == 0: data = None error_msg = "No continuous columns" elif ncont < len(data.domain.attributes): warning_msg = "{0} discrete columns will not be used for " \ "distance computation".format(ndiscrete) self.data = data if data is not None: self.cluster_var_model[:] = candidatevars if data.domain.class_var in candidatevars: self.cluster_var_idx = candidatevars.index(data.domain.class_var) else: self.cluster_var_idx = 0 annotvars = [var for var in data.domain.metas if var.is_string] self.annotation_var_model[:] = ["None"] + annotvars self.annotation_var_idx = 1 if len(annotvars) else 0 self._effective_data = Orange.distance._preprocess(data) self.openContext(Orange.data.Domain(candidatevars)) self.error(0, error_msg) self.warning(0, warning_msg) def handleNewSignals(self): if self._effective_data is not None: self._update() self._replot() self.unconditional_commit() def clear(self): """ Clear the widget state. """ self.data = None self._effective_data = None self._matrix = None self._silhouette = None self._labels = None self.cluster_var_model[:] = [] self.annotation_var_model[:] = ["None"] self._clear_scene() def _clear_scene(self): # Clear the graphics scene and associated objects self.scene.clear() self.scene.setSceneRect(QRectF()) self._silplot = None def _invalidate_distances(self): # Invalidate the computed distance matrix and recompute the silhouette. self._matrix = None self._invalidate_scores() def _invalidate_scores(self): # Invalidate and recompute the current silhouette scores. self._labels = self._silhouette = None self._update() self._replot() if self.data is not None: self.commit() def _update(self): # Update/recompute the distances/scores as required if self.data is None: self._silhouette = None self._labels = None self._matrix = None self._clear_scene() return if self._matrix is None and self._effective_data is not None: _, metric = self.Distances[self.distance_idx] self._matrix = numpy.asarray(metric(self._effective_data)) labelvar = self.cluster_var_model[self.cluster_var_idx] labels, _ = self.data.get_column_view(labelvar) labels = labels.astype(int) _, counts = numpy.unique(labels, return_counts=True) if numpy.count_nonzero(counts) >= 2: self.error(1, "") silhouette = sklearn.metrics.silhouette_samples( self._matrix, labels, metric="precomputed") else: self.error(1, "Need at least 2 clusters with non zero counts") labels = silhouette = None self._labels = labels self._silhouette = silhouette def _replot(self): # Clear and replot/initialize the scene self._clear_scene() if self._silhouette is not None and self._labels is not None: var = self.cluster_var_model[self.cluster_var_idx] silplot = SilhouettePlot() silplot.setBarHeight(self.bar_size) silplot.setRowNamesVisible(self.bar_size >= 5) if self.group_by_cluster: silplot.setScores(self._silhouette, self._labels, var.values) else: silplot.setScores( self._silhouette, numpy.zeros(len(self._silhouette), dtype=int), [""] ) self.scene.addItem(silplot) self._silplot = silplot self._update_annotations() silplot.resize(silplot.effectiveSizeHint(Qt.PreferredSize)) silplot.selectionChanged.connect(self.commit) self.scene.setSceneRect( QRectF(QtCore.QPointF(0, 0), self._silplot.effectiveSizeHint(Qt.PreferredSize))) def _update_bar_size(self): if self._silplot is not None: self._silplot.setBarHeight(self.bar_size) self._silplot.setRowNamesVisible(self.bar_size >= 5) self.scene.setSceneRect( QRectF(QtCore.QPointF(0, 0), self._silplot.effectiveSizeHint(Qt.PreferredSize))) def _update_annotations(self): if 0 < self.annotation_var_idx < len(self.annotation_var_model): annot_var = self.annotation_var_model[self.annotation_var_idx] else: annot_var = None if self._silplot is not None: if annot_var is not None: column, _ = self.data.get_column_view(annot_var) self._silplot.setRowNames( [annot_var.str_val(value) for value in column]) else: self._silplot.setRowNames(None) def commit(self): """ Commit/send the current selection to the output. """ selected = other = None if self.data is not None: selectedmask = numpy.full(len(self.data), False, dtype=bool) if self._silplot is not None: indices = self._silplot.selection() selectedmask[indices] = True scores = self._silhouette silhouette_var = None if self.add_scores: var = self.cluster_var_model[self.cluster_var_idx] silhouette_var = Orange.data.ContinuousVariable( "Silhouette ({})".format(escape(var.name))) domain = Orange.data.Domain( self.data.domain.attributes, self.data.domain.class_vars, self.data.domain.metas + (silhouette_var, )) else: domain = self.data.domain if numpy.count_nonzero(selectedmask): selected = self.data.from_table( domain, self.data, numpy.flatnonzero(selectedmask)) if numpy.count_nonzero(~selectedmask): other = self.data.from_table( domain, self.data, numpy.flatnonzero(~selectedmask)) if self.add_scores: if selected is not None: selected[:, silhouette_var] = numpy.c_[scores[selectedmask]] if other is not None: other[:, silhouette_var] = numpy.c_[scores[~selectedmask]] self.send("Selected Data", selected) self.send("Other Data", other) def onDeleteWidget(self): self.clear() super().onDeleteWidget()
class OWDistributions(OWWidget): name = "Distributions" description = "Display value distributions of a data feature in a graph." icon = "icons/Distribution.svg" priority = 120 keywords = [] class Inputs: data = Input("Data", Table, doc="Set the input dataset") class Outputs: selected_data = Output("Selected Data", Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table) histogram_data = Output("Histogram Data", Table) class Error(OWWidget.Error): no_defined_values_var = \ Msg("Variable '{}' does not have any defined values") no_defined_values_pair = \ Msg("No data instances with '{}' and '{}' defined") class Warning(OWWidget.Warning): ignored_nans = Msg("Data instances with missing values are ignored") settingsHandler = settings.DomainContextHandler() var = settings.ContextSetting(None) cvar = settings.ContextSetting(None) selection = settings.ContextSetting(set(), schema_only=True) # number_of_bins must be a context setting because selection depends on it number_of_bins = settings.ContextSetting(5, schema_only=True) fitted_distribution = settings.Setting(0) hide_bars = settings.Setting(False) show_probs = settings.Setting(False) stacked_columns = settings.Setting(False) cumulative_distr = settings.Setting(False) kde_smoothing = settings.Setting(10) auto_apply = settings.Setting(True) graph_name = "plot" Fitters = (("None", None, (), ()), ("Normal", norm, ("loc", "scale"), ("μ", "σ²")), ("Beta", beta, ("a", "b", "loc", "scale"), ("α", "β", "-loc", "-scale")), ("Gamma", gamma, ("a", "loc", "scale"), ("α", "β", "-loc", "-scale")), ("Rayleigh", rayleigh, ("loc", "scale"), ("-loc", "σ²")), ("Pareto", pareto, ("b", "loc", "scale"), ("α", "-loc", "-scale")), ("Exponential", expon, ("loc", "scale"), ("-loc", "λ")), ("Kernel density", AshCurve, ("a", ), ("", ))) DragNone, DragAdd, DragRemove = range(3) def __init__(self): super().__init__() self.data = None self.valid_data = self.valid_group_data = None self.bar_items = [] self.curve_items = [] self.curve_descriptions = None self.binnings = [] self.last_click_idx = None self.drag_operation = self.DragNone self.key_operation = None self._user_var_bins = {} gui.listView(self.controlArea, self, "var", box="Variable", model=DomainModel(valid_types=DomainModel.PRIMITIVE, separators=False), callback=self._on_var_changed) box = self.continuous_box = gui.vBox(self.controlArea, "Distribution") slider = gui.hSlider(box, self, "number_of_bins", label="Bin width", orientation=Qt.Horizontal, minValue=0, maxValue=max(1, len(self.binnings) - 1), createLabel=False, callback=self._on_bins_changed) self.bin_width_label = gui.widgetLabel(slider.box) self.bin_width_label.setFixedWidth(35) self.bin_width_label.setAlignment(Qt.AlignRight) slider.sliderReleased.connect(self._on_bin_slider_released) gui.comboBox(box, self, "fitted_distribution", label="Fitted distribution", orientation=Qt.Horizontal, items=(name[0] for name in self.Fitters), callback=self._on_fitted_dist_changed) self.smoothing_box = gui.indentedBox(box, 40) gui.hSlider(self.smoothing_box, self, "kde_smoothing", label="Smoothing", orientation=Qt.Horizontal, minValue=2, maxValue=20, callback=self.replot) gui.checkBox(box, self, "hide_bars", "Hide bars", stateWhenDisabled=False, callback=self._on_hide_bars_changed, disabled=not self.fitted_distribution) box = gui.vBox(self.controlArea, "Columns") gui.comboBox(box, self, "cvar", label="Split by", orientation=Qt.Horizontal, model=DomainModel( placeholder="(None)", valid_types=(DiscreteVariable), ), callback=self._on_cvar_changed, contentsLength=18) gui.checkBox(box, self, "stacked_columns", "Stack columns", callback=self.replot) gui.checkBox(box, self, "show_probs", "Show probabilities", callback=self._on_show_probabilities_changed) gui.checkBox(box, self, "cumulative_distr", "Show cumulative distribution", callback=self.replot) gui.auto_apply(self.controlArea, self, commit=self.apply) self._set_smoothing_visibility() self._setup_plots() self._setup_legend() def _setup_plots(self): def add_new_plot(zvalue): plot = pg.ViewBox(enableMouse=False, enableMenu=False) self.ploti.scene().addItem(plot) pg.AxisItem("right").linkToView(plot) plot.setXLink(self.ploti) plot.setZValue(zvalue) return plot self.plotview = DistributionWidget(background=None) self.plotview.item_clicked.connect(self._on_item_clicked) self.plotview.blank_clicked.connect(self._on_blank_clicked) self.plotview.mouse_released.connect(self._on_end_selecting) self.plotview.setRenderHint(QPainter.Antialiasing) self.mainArea.layout().addWidget(self.plotview) self.ploti = pg.PlotItem( enableMenu=False, enableMouse=False, axisItems={"bottom": ElidedAxisNoUnits("bottom")}) self.plot = self.ploti.vb self.plot.setMouseEnabled(False, False) self.ploti.hideButtons() self.plotview.setCentralItem(self.ploti) self.plot_pdf = add_new_plot(10) self.plot_mark = add_new_plot(-10) self.plot_mark.setYRange(0, 1) self.ploti.vb.sigResized.connect(self.update_views) self.update_views() pen = QPen(self.palette().color(QPalette.Text)) self.ploti.getAxis("bottom").setPen(pen) left = self.ploti.getAxis("left") left.setPen(pen) left.setStyle(stopAxisAtTick=(True, True)) def _setup_legend(self): self._legend = LegendItem() self._legend.setParentItem(self.plot_pdf) self._legend.hide() self._legend.anchor((1, 0), (1, 0)) # ----------------------------- # Event and signal handlers def update_views(self): for plot in (self.plot_pdf, self.plot_mark): plot.setGeometry(self.plot.sceneBoundingRect()) plot.linkedViewChanged(self.plot, plot.XAxis) def onDeleteWidget(self): self.plot.clear() self.plot_pdf.clear() self.plot_mark.clear() super().onDeleteWidget() @Inputs.data def set_data(self, data): self.closeContext() self.var = self.cvar = None self.data = data domain = self.data.domain if self.data else None varmodel = self.controls.var.model() cvarmodel = self.controls.cvar.model() varmodel.set_domain(domain) cvarmodel.set_domain(domain) if varmodel: self.var = varmodel[min(len(domain.class_vars), len(varmodel) - 1)] if domain is not None and domain.has_discrete_class: self.cvar = domain.class_var self.reset_select() self._user_var_bins.clear() self.openContext(domain) self.set_valid_data() self.recompute_binnings() self.replot() self.apply() def _on_var_changed(self): self.reset_select() self.set_valid_data() self.recompute_binnings() self.replot() self.apply() def _on_cvar_changed(self): self.set_valid_data() self.replot() self.apply() def _on_bins_changed(self): self.reset_select() self._set_bin_width_slider_label() self.replot() # this is triggered when dragging, so don't call apply here; # apply is called on sliderReleased def _on_bin_slider_released(self): self._user_var_bins[self.var] = self.number_of_bins self.apply() def _on_fitted_dist_changed(self): self.controls.hide_bars.setDisabled(not self.fitted_distribution) self._set_smoothing_visibility() self.replot() def _on_hide_bars_changed(self): for bar in self.bar_items: # pylint: disable=blacklisted-name bar.setHidden(self.hide_bars) self._set_curve_brushes() self.plot.update() def _set_smoothing_visibility(self): self.smoothing_box.setVisible( self.Fitters[self.fitted_distribution][1] is AshCurve) def _set_bin_width_slider_label(self): if self.number_of_bins < len(self.binnings): text = reduce(lambda s, rep: s.replace(*rep), short_time_units.items(), self.binnings[self.number_of_bins].width_label) else: text = "" self.bin_width_label.setText(text) def _on_show_probabilities_changed(self): label = self.controls.fitted_distribution.label if self.show_probs: label.setText("Fitted probability") label.setToolTip( "Chosen distribution is used to compute Bayesian probabilities" ) else: label.setText("Fitted distribution") label.setToolTip("") self.replot() @property def is_valid(self): return self.valid_data is not None def set_valid_data(self): err_def_var = self.Error.no_defined_values_var err_def_pair = self.Error.no_defined_values_pair err_def_var.clear() err_def_pair.clear() self.Warning.ignored_nans.clear() self.valid_data = self.valid_group_data = None if self.var is None: return column = self.data.get_column_view(self.var)[0].astype(float) valid_mask = np.isfinite(column) if not np.any(valid_mask): self.Error.no_defined_values_var(self.var.name) return if self.cvar: ccolumn = self.data.get_column_view(self.cvar)[0].astype(float) valid_mask *= np.isfinite(ccolumn) if not np.any(valid_mask): self.Error.no_defined_values_pair(self.var.name, self.cvar.name) return self.valid_group_data = ccolumn[valid_mask] if not np.all(valid_mask): self.Warning.ignored_nans() self.valid_data = column[valid_mask] # ----------------------------- # Plotting def replot(self): self._clear_plot() if self.is_valid: self._set_axis_names() self._update_controls_state() self._call_plotting() self._display_legend() self.show_selection() def _clear_plot(self): self.plot.clear() self.plot_pdf.clear() self.plot_mark.clear() self.bar_items = [] self.curve_items = [] self._legend.clear() self._legend.hide() def _set_axis_names(self): assert self.is_valid # called only from replot, so assumes data is OK bottomaxis = self.ploti.getAxis("bottom") bottomaxis.setLabel(self.var and self.var.name) bottomaxis.setShowUnit(not (self.var and self.var.is_time)) leftaxis = self.ploti.getAxis("left") if self.show_probs and self.cvar: leftaxis.setLabel( f"Probability of '{self.cvar.name}' at given '{self.var.name}'" ) else: leftaxis.setLabel("Frequency") leftaxis.resizeEvent() def _update_controls_state(self): assert self.is_valid # called only from replot, so assumes data is OK self.continuous_box.setDisabled(self.var.is_discrete) self.controls.show_probs.setDisabled(self.cvar is None) self.controls.stacked_columns.setDisabled(self.cvar is None) def _call_plotting(self): assert self.is_valid # called only from replot, so assumes data is OK self.curve_descriptions = None if self.var.is_discrete: if self.cvar: self._disc_split_plot() else: self._disc_plot() else: if self.cvar: self._cont_split_plot() else: self._cont_plot() self.plot.autoRange() def _add_bar(self, x, width, padding, freqs, colors, stacked, expanded, tooltip, hidden=False): item = DistributionBarItem(x, width, padding, freqs, colors, stacked, expanded, tooltip, hidden) self.plot.addItem(item) self.bar_items.append(item) def _disc_plot(self): var = self.var self.ploti.getAxis("bottom").setTicks([list(enumerate(var.values))]) colors = [QColor(0, 128, 255)] dist = distribution.get_distribution(self.data, self.var) for i, freq in enumerate(dist): tooltip = \ "<p style='white-space:pre;'>" \ f"<b>{escape(var.values[i])}</b>: {int(freq)} " \ f"({100 * freq / len(self.valid_data):.2f} %) " self._add_bar(i - 0.5, 1, 0.1, [freq], colors, stacked=False, expanded=False, tooltip=tooltip) def _disc_split_plot(self): var = self.var self.ploti.getAxis("bottom").setTicks([list(enumerate(var.values))]) gcolors = [QColor(*col) for col in self.cvar.colors] gvalues = self.cvar.values conts = contingency.get_contingency(self.data, self.cvar, self.var) total = len(self.data) for i, freqs in enumerate(conts): self._add_bar(i - 0.5, 1, 0.1, freqs, gcolors, stacked=self.stacked_columns, expanded=self.show_probs, tooltip=self._split_tooltip(var.values[i], np.sum(freqs), total, gvalues, freqs)) def _cont_plot(self): self._set_cont_ticks() data = self.valid_data y, x = np.histogram(data, bins=self.binnings[self.number_of_bins].thresholds) total = len(data) colors = [QColor(0, 128, 255)] if self.fitted_distribution: colors[0] = colors[0].lighter(130) tot_freq = 0 lasti = len(y) - 1 for i, (x0, x1), freq in zip(count(), zip(x, x[1:]), y): tot_freq += freq tooltip = \ "<p style='white-space:pre;'>" \ f"<b>{escape(self.str_int(x0, x1, not i, i == lasti))}</b>: " \ f"{freq} ({100 * freq / total:.2f} %)</p>" self._add_bar(x0, x1 - x0, 0, [tot_freq if self.cumulative_distr else freq], colors, stacked=False, expanded=False, tooltip=tooltip, hidden=self.hide_bars) if self.fitted_distribution: self._plot_approximations(x[0], x[-1], [self._fit_approximation(data)], [QColor(0, 0, 0)], (1, )) def _cont_split_plot(self): self._set_cont_ticks() data = self.valid_data _, bins = np.histogram( data, bins=self.binnings[self.number_of_bins].thresholds) gvalues = self.cvar.values varcolors = [QColor(*col) for col in self.cvar.colors] if self.fitted_distribution: gcolors = [c.lighter(130) for c in varcolors] else: gcolors = varcolors nvalues = len(gvalues) ys = [] fitters = [] prior_sizes = [] for val_idx in range(nvalues): group_data = data[self.valid_group_data == val_idx] prior_sizes.append(len(group_data)) ys.append(np.histogram(group_data, bins)[0]) if self.fitted_distribution: fitters.append(self._fit_approximation(group_data)) total = len(data) prior_sizes = np.array(prior_sizes) tot_freqs = np.zeros(len(ys)) lasti = len(ys[0]) - 1 for i, x0, x1, freqs in zip(count(), bins, bins[1:], zip(*ys)): tot_freqs += freqs plotfreqs = tot_freqs.copy() if self.cumulative_distr else freqs self._add_bar(x0, x1 - x0, 0 if self.stacked_columns else 0.1, plotfreqs, gcolors, stacked=self.stacked_columns, expanded=self.show_probs, hidden=self.hide_bars, tooltip=self._split_tooltip( self.str_int(x0, x1, not i, i == lasti), np.sum(plotfreqs), total, gvalues, plotfreqs)) if fitters: self._plot_approximations(bins[0], bins[-1], fitters, varcolors, prior_sizes / len(data)) def _set_cont_ticks(self): axis = self.ploti.getAxis("bottom") if self.var and self.var.is_time: binning = self.binnings[self.number_of_bins] labels = np.array(binning.short_labels) thresholds = np.array(binning.thresholds) lengths = np.array([len(lab) for lab in labels]) slengths = set(lengths) if len(slengths) == 1: ticks = [ list(zip(thresholds[::2], labels[::2])), list(zip(thresholds[1::2], labels[1::2])) ] else: ticks = [] for length in sorted(slengths, reverse=True): idxs = lengths == length ticks.append(list(zip(thresholds[idxs], labels[idxs]))) axis.setTicks(ticks) else: axis.setTicks(None) def _fit_approximation(self, y): def join_pars(pairs): strv = self.var.str_val return ", ".join(f"{sname}={strv(val)}" for sname, val in pairs) def str_params(): s = join_pars((sname, val) for sname, val in zip(str_names, fitted) if sname and sname[0] != "-") par = join_pars((sname[1:], val) for sname, val in zip(str_names, fitted) if sname and sname[0] == "-") if par: s += f" ({par})" return s if not y.size: return None, None _, dist, names, str_names = self.Fitters[self.fitted_distribution] fitted = dist.fit(y) params = dict(zip(names, fitted)) return partial(dist.pdf, **params), str_params() def _plot_approximations(self, x0, x1, fitters, colors, prior_probs): x = np.linspace(x0, x1, 100) ys = np.zeros((len(fitters), 100)) self.curve_descriptions = [s for _, s in fitters] for y, (fitter, _) in zip(ys, fitters): if fitter is None: continue if self.Fitters[self.fitted_distribution][1] is AshCurve: y[:] = fitter(x, sigma=(22 - self.kde_smoothing) / 40) else: y[:] = fitter(x) if self.cumulative_distr: y[:] = np.cumsum(y) tots = np.sum(ys, axis=0) show_probs = self.show_probs and self.cvar is not None plot = self.ploti if show_probs else self.plot_pdf for y, prior_prob, color in zip(ys, prior_probs, colors): if not prior_prob: continue if show_probs: y_p = y * prior_prob tot = (y_p + (tots - y) * (1 - prior_prob)) tot[tot == 0] = 1 y = y_p / tot curve = pg.PlotCurveItem(x=x, y=y, fillLevel=0, pen=pg.mkPen(width=5, color=color), shadowPen=pg.mkPen( width=8, color=color.darker(120))) plot.addItem(curve) self.curve_items.append(curve) if not show_probs: self.plot_pdf.autoRange() self._set_curve_brushes() def _set_curve_brushes(self): for curve in self.curve_items: if self.hide_bars: color = curve.opts['pen'].color().lighter(160) color.setAlpha(128) curve.setBrush(pg.mkBrush(color)) else: curve.setBrush(None) @staticmethod def _split_tooltip(valname, tot_group, total, gvalues, freqs): div_group = tot_group or 1 cs = "white-space:pre; text-align: right;" s = f"style='{cs} padding-left: 1em'" snp = f"style='{cs}'" return f"<table style='border-collapse: collapse'>" \ f"<tr><th {s}>{escape(valname)}:</th>" \ f"<td {snp}><b>{int(tot_group)}</b></td>" \ "<td/>" \ f"<td {s}><b>{100 * tot_group / total:.2f} %</b></td></tr>" + \ f"<tr><td/><td/><td {s}>(in group)</td><td {s}>(overall)</td>" \ "</tr>" + \ "".join( "<tr>" f"<th {s}>{value}:</th>" f"<td {snp}><b>{int(freq)}</b></td>" f"<td {s}>{100 * freq / div_group:.2f} %</td>" f"<td {s}>{100 * freq / total:.2f} %</td>" "</tr>" for value, freq in zip(gvalues, freqs)) + \ "</table>" def _display_legend(self): assert self.is_valid # called only from replot, so assumes data is OK if self.cvar is None: if not self.curve_descriptions or not self.curve_descriptions[0]: self._legend.hide() return self._legend.addItem( pg.PlotCurveItem(pen=pg.mkPen(width=5, color=0.0)), self.curve_descriptions[0]) else: cvar_values = self.cvar.values colors = [QColor(*col) for col in self.cvar.colors] descriptions = self.curve_descriptions or repeat(None) for color, name, desc in zip(colors, cvar_values, descriptions): self._legend.addItem( ScatterPlotItem(pen=color, brush=color, size=10, shape="s"), escape(name + (f" ({desc})" if desc else ""))) self._legend.show() # ----------------------------- # Bins def recompute_binnings(self): if self.is_valid and self.var.is_continuous: # binning is computed on valid var data, ignoring any cvar nans column = self.data.get_column_view(self.var)[0].astype(float) if np.any(np.isfinite(column)): if self.var.is_time: self.binnings = time_binnings(column, min_unique=5) self.bin_width_label.setFixedWidth(45) else: self.binnings = decimal_binnings( column, min_width=self.min_var_resolution(self.var), add_unique=10, min_unique=5) self.bin_width_label.setFixedWidth(35) max_bins = len(self.binnings) - 1 else: self.binnings = [] max_bins = 0 self.controls.number_of_bins.setMaximum(max_bins) self.number_of_bins = min( max_bins, self._user_var_bins.get(self.var, self.number_of_bins)) self._set_bin_width_slider_label() @staticmethod def min_var_resolution(var): # pylint: disable=unidiomatic-typecheck if type(var) is not ContinuousVariable: return 0 return 10**-var.number_of_decimals def str_int(self, x0, x1, first, last): var = self.var sx0, sx1 = var.repr_val(x0), var.repr_val(x1) if self.cumulative_distr: return f"{var.name} < {sx1}" elif first and last: return f"{var.name} = {sx0}" elif first: return f"{var.name} < {sx1}" elif last: return f"{var.name} ≥ {sx0}" elif sx0 == sx1 or x1 - x0 <= self.min_var_resolution(var): return f"{var.name} = {sx0}" else: return f"{sx0} ≤ {var.name} < {sx1}" # ----------------------------- # Selection def _on_item_clicked(self, item, modifiers, drag): def add_or_remove(idx, add): self.drag_operation = [self.DragRemove, self.DragAdd][add] if add: self.selection.add(idx) else: if idx in self.selection: # This can be False when removing with dragging and the # mouse crosses unselected items self.selection.remove(idx) def add_range(add): if self.last_click_idx is None: add = True idx_range = {idx} else: from_idx, to_idx = sorted((self.last_click_idx, idx)) idx_range = set(range(from_idx, to_idx + 1)) self.drag_operation = [self.DragRemove, self.DragAdd][add] if add: self.selection |= idx_range else: self.selection -= idx_range self.key_operation = None if item is None: self.reset_select() return idx = self.bar_items.index(item) if drag: # Dragging has to add a range, otherwise fast dragging skips bars add_range(self.drag_operation == self.DragAdd) else: if modifiers & Qt.ShiftModifier: add_range(self.drag_operation == self.DragAdd) elif modifiers & Qt.ControlModifier: add_or_remove(idx, add=idx not in self.selection) else: if self.selection == {idx}: # Clicking on a single selected bar deselects it, # but dragging from here will select add_or_remove(idx, add=False) self.drag_operation = self.DragAdd else: self.selection.clear() add_or_remove(idx, add=True) self.last_click_idx = idx self.show_selection() def _on_blank_clicked(self): self.reset_select() def reset_select(self): self.selection.clear() self.last_click_idx = None self.drag_operation = None self.key_operation = None self.show_selection() def _on_end_selecting(self): self.apply() def show_selection(self): self.plot_mark.clear() if not self.is_valid: # though if it's not, selection is empty anyway return blue = QColor(Qt.blue) pen = QPen(QBrush(blue), 3) pen.setCosmetic(True) brush = QBrush(blue.lighter(190)) for group in self.grouped_selection(): group = list(group) left_idx, right_idx = group[0], group[-1] left_pad, right_pad = self._determine_padding(left_idx, right_idx) x0 = self.bar_items[left_idx].x0 - left_pad x1 = self.bar_items[right_idx].x1 + right_pad item = QGraphicsRectItem(x0, 0, x1 - x0, 1) item.setPen(pen) item.setBrush(brush) if self.var.is_continuous: valname = self.str_int(x0, x1, not left_idx, right_idx == len(self.bar_items) - 1) inside = sum(np.sum(self.bar_items[i].freqs) for i in group) total = len(self.valid_data) item.setToolTip("<p style='white-space:pre;'>" f"<b>{escape(valname)}</b>: " f"{inside} ({100 * inside / total:.2f} %)") self.plot_mark.addItem(item) def _determine_padding(self, left_idx, right_idx): def _padding(i): return (self.bar_items[i + 1].x0 - self.bar_items[i].x1) / 2 if len(self.bar_items) == 1: return 6, 6 if left_idx == 0 and right_idx == len(self.bar_items) - 1: return (_padding(0), ) * 2 if left_idx > 0: left_pad = _padding(left_idx - 1) if right_idx < len(self.bar_items) - 1: right_pad = _padding(right_idx) else: right_pad = left_pad if left_idx == 0: left_pad = right_pad return left_pad, right_pad def grouped_selection(self): return [[g[1] for g in group] for _, group in groupby(enumerate(sorted(self.selection)), key=lambda x: x[1] - x[0])] def keyPressEvent(self, e): def on_nothing_selected(): if e.key() == Qt.Key_Left: self.last_click_idx = len(self.bar_items) - 1 else: self.last_click_idx = 0 self.selection.add(self.last_click_idx) def on_key_left(): if e.modifiers() & Qt.ShiftModifier: if self.key_operation == Qt.Key_Right and first != last: self.selection.remove(last) self.last_click_idx = last - 1 elif first: self.key_operation = Qt.Key_Left self.selection.add(first - 1) self.last_click_idx = first - 1 else: self.selection.clear() self.last_click_idx = max(first - 1, 0) self.selection.add(self.last_click_idx) def on_key_right(): if e.modifiers() & Qt.ShiftModifier: if self.key_operation == Qt.Key_Left and first != last: self.selection.remove(first) self.last_click_idx = first + 1 elif not self._is_last_bar(last): self.key_operation = Qt.Key_Right self.selection.add(last + 1) self.last_click_idx = last + 1 else: self.selection.clear() self.last_click_idx = min(last + 1, len(self.bar_items) - 1) self.selection.add(self.last_click_idx) if not self.is_valid or not self.bar_items \ or e.key() not in (Qt.Key_Left, Qt.Key_Right): super().keyPressEvent(e) return prev_selection = self.selection.copy() if not self.selection: on_nothing_selected() else: first, last = min(self.selection), max(self.selection) if e.key() == Qt.Key_Left: on_key_left() else: on_key_right() if self.selection != prev_selection: self.drag_operation = self.DragAdd self.show_selection() self.apply() def keyReleaseEvent(self, ev): if ev.key() == Qt.Key_Shift: self.key_operation = None super().keyReleaseEvent(ev) # ----------------------------- # Output def apply(self): data = self.data selected_data = annotated_data = histogram_data = None if self.is_valid: if self.var.is_discrete: group_indices, values = self._get_output_indices_disc() else: group_indices, values = self._get_output_indices_cont() hist_indices, hist_values = self._get_histogram_indices() histogram_data = create_groups_table(data, hist_indices, values=hist_values) selected = np.nonzero(group_indices)[0] if selected.size: selected_data = create_groups_table(data, group_indices, include_unselected=False, values=values) annotated_data = create_annotated_table(data, selected) self.Outputs.selected_data.send(selected_data) self.Outputs.annotated_data.send(annotated_data) self.Outputs.histogram_data.send(histogram_data) def _get_output_indices_disc(self): group_indices = np.zeros(len(self.data), dtype=np.int32) col = self.data.get_column_view(self.var)[0].astype(float) for group_idx, val_idx in enumerate(self.selection, start=1): group_indices[col == val_idx] = group_idx values = [self.var.values[i] for i in self.selection] return group_indices, values def _get_output_indices_cont(self): group_indices = np.zeros(len(self.data), dtype=np.int32) col = self.data.get_column_view(self.var)[0].astype(float) values = [] for group_idx, group in enumerate(self.grouped_selection(), start=1): x0 = x1 = None for bar_idx in group: minx, maxx, mask = self._get_cont_baritem_indices(col, bar_idx) if x0 is None: x0 = minx x1 = maxx group_indices[mask] = group_idx # pylint: disable=undefined-loop-variable values.append( self.str_int(x0, x1, not bar_idx, self._is_last_bar(bar_idx))) return group_indices, values def _get_histogram_indices(self): group_indices = np.zeros(len(self.data), dtype=np.int32) col = self.data.get_column_view(self.var)[0].astype(float) values = [] for bar_idx in range(len(self.bar_items)): x0, x1, mask = self._get_cont_baritem_indices(col, bar_idx) group_indices[mask] = bar_idx + 1 values.append( self.str_int(x0, x1, not bar_idx, self._is_last_bar(bar_idx))) return group_indices, values def _get_cont_baritem_indices(self, col, bar_idx): bar_item = self.bar_items[bar_idx] minx = bar_item.x0 maxx = bar_item.x1 + (bar_idx == len(self.bar_items) - 1) with np.errstate(invalid="ignore"): return minx, maxx, (col >= minx) * (col < maxx) def _is_last_bar(self, idx): return idx == len(self.bar_items) - 1 # ----------------------------- # Report def get_widget_name_extension(self): return self.var def send_report(self): self.plotview.scene().setSceneRect(self.plotview.sceneRect()) if not self.is_valid: return self.report_plot() if self.cumulative_distr: text = f"Cummulative distribution of '{self.var.name}'" else: text = f"Distribution of '{self.var.name}'" if self.cvar: text += f" with columns split by '{self.cvar.name}'" self.report_caption(text)
class OWPIPAx(widget.OWWidget): name = "PIPAx" description = "Access data from PIPA RNA-Seq database." icon = "../widgets/icons/PIPA.svg" priority = 35 inputs = [] outputs = [("Data", Orange.data.Table)] username = settings.Setting("") password = settings.Setting("") log2 = settings.Setting(False) rtypei = settings.Setting(5) # hardcoded rpkm mapability polya excludeconstant = settings.Setting(False) joinreplicates = settings.Setting(False) #: The stored current selection (in experiments view) #: SelectionByKey | None currentSelection = settings.Setting(None) #: Stored selections (presets) #: list of SelectionByKey storedSelections = settings.Setting([]) #: Stored column sort keys (from Sort view) #: list of strings storedSortingOrder = settings.Setting( ["Strain", "Experiment", "Genotype", "Timepoint"]) experimentsHeaderState = settings.Setting( {name: False for _, name in HEADER[:ID_INDEX + 1]}) def __init__(self, parent=None, signalManager=None, name="PIPAx"): super().__init__(parent) self.selectedExperiments = [] self.buffer = dicty.CacheSQLite(bufferfile) self.searchString = "" self.result_types = [] self.mappings = {} self.controlArea.setMaximumWidth(250) self.controlArea.setMinimumWidth(250) gui.button(self.controlArea, self, "Reload", callback=self.Reload) gui.button(self.controlArea, self, "Clear cache", callback=self.clear_cache) b = gui.widgetBox(self.controlArea, "Experiment Sets") self.selectionSetsWidget = SelectionSetsWidget(self) self.selectionSetsWidget.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Maximum) def store_selections(modified): if not modified: self.storedSelections = self.selectionSetsWidget.selections self.selectionSetsWidget.selectionModified.connect(store_selections) b.layout().addWidget(self.selectionSetsWidget) gui.separator(self.controlArea) b = gui.widgetBox(self.controlArea, "Sort output columns") self.columnsSortingWidget = SortedListWidget(self) self.columnsSortingWidget.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Maximum) def store_sort_order(): self.storedSortingOrder = self.columnsSortingWidget.sortingOrder self.columnsSortingWidget.sortingOrderChanged.connect(store_sort_order) b.layout().addWidget(self.columnsSortingWidget) sorting_model = QStringListModel(SORTING_MODEL_LIST) self.columnsSortingWidget.setModel(sorting_model) gui.separator(self.controlArea) box = gui.widgetBox(self.controlArea, 'Expression Type') self.expressionTypesCB = gui.comboBox(box, self, "rtypei", items=[], callback=self.UpdateResultsList) gui.checkBox(self.controlArea, self, "excludeconstant", "Exclude labels with constant values") gui.checkBox(self.controlArea, self, "joinreplicates", "Average replicates (use median)") gui.checkBox(self.controlArea, self, "log2", "Logarithmic (base 2) transformation") self.commit_button = gui.button(self.controlArea, self, "&Commit", callback=self.Commit) self.commit_button.setDisabled(True) gui.rubber(self.controlArea) box = gui.widgetBox(self.controlArea, "Authentication") gui.lineEdit(box, self, "username", "Username:"******"password", "Password:"******"searchString", "Search", callbackOnType=True, callback=self.SearchUpdate) self.headerLabels = [t[1] for t in HEADER] self.experimentsWidget = QTreeWidget() self.experimentsWidget.setHeaderLabels(self.headerLabels) self.experimentsWidget.setSelectionMode(QTreeWidget.ExtendedSelection) self.experimentsWidget.setRootIsDecorated(False) self.experimentsWidget.setSortingEnabled(True) contextEventFilter = gui.VisibleHeaderSectionContextEventFilter( self.experimentsWidget, self.experimentsWidget) self.experimentsWidget.header().installEventFilter(contextEventFilter) self.experimentsWidget.setItemDelegateForColumn( 0, gui.IndicatorItemDelegate(self, role=Qt.DisplayRole)) self.experimentsWidget.setAlternatingRowColors(True) self.experimentsWidget.selectionModel().selectionChanged.connect( self.onSelectionChanged) self.selectionSetsWidget.setSelectionModel( self.experimentsWidget.selectionModel()) self.mainArea.layout().addWidget(self.experimentsWidget) # Restore the selection states from the stored settings self.selectionSetsWidget.selections = self.storedSelections self.columnsSortingWidget.sortingOrder = self.storedSortingOrder self.restoreHeaderState() self.experimentsWidget.header().geometriesChanged.connect( self.saveHeaderState) self.dbc = None self.AuthSet() QTimer.singleShot(100, self.UpdateExperiments) def sizeHint(self): return QSize(800, 600) def AuthSet(self): if len(self.username): self.passf.setDisabled(False) else: self.passf.setDisabled(True) def AuthChanged(self): self.AuthSet() self.ConnectAndUpdate() def ConnectAndUpdate(self): self.Connect() self.UpdateExperiments(reload=True) def Connect(self): self.error(1) self.warning(1) def en(x): return x if len(x) else None self.dbc = dicty.PIPAx(cache=self.buffer, username=en(self.username), password=self.password) # check password if en(self.username) != None: try: self.dbc.mappings(reload=True) except dicty.AuthenticationError: self.error(1, "Wrong username or password") self.dbc = None except Exception as ex: print("Error when contacting the PIPA database", ex) sys.excepthook(*sys.exc_info()) try: # maybe cached? self.dbc.mappings() self.warning( 1, "Can not access database - using cached data.") except Exception as ex: self.dbc = None self.error(1, "Can not access database.") def Reload(self): self.UpdateExperiments(reload=True) def clear_cache(self): self.buffer.clear() self.Reload() def rtype(self): """Return selected result template type """ if self.result_types: return self.result_types[self.rtypei][0] else: return "-1" def UpdateExperimentTypes(self): self.expressionTypesCB.clear() items = [desc for _, desc in self.result_types] self.expressionTypesCB.addItems(items) self.rtypei = max(0, min(self.rtypei, len(self.result_types) - 1)) def UpdateExperiments(self, reload=False): self.experimentsWidget.clear() self.items = [] self.progressBarInit() if not self.dbc: self.Connect() mappings = {} result_types = [] sucind = False # success indicator for database index try: mappings = self.dbc.mappings(reload=reload) result_types = self.dbc.result_types(reload=reload) sucind = True except Exception as ex: try: mappings = self.dbc.mappings() result_types = self.dbc.result_types() self.warning(0, "Can not access database - using cached data.") sucind = True except Exception as ex: self.error(0, "Can not access database.") if sucind: self.warning(0) self.error(0) self.mappings = mappings self.result_types = result_types self.UpdateExperimentTypes() self.UpdateResultsList(reload=reload) self.progressBarFinished() if self.currentSelection: self.currentSelection.select( self.experimentsWidget.selectionModel()) self.handle_commit_button() def UpdateResultsList(self, reload=False): results_list = {} try: results_list = self.dbc.results_list(self.rtype(), reload=reload) except Exception as ex: try: results_list = self.dbc.results_list(self.rtype()) except Exception as ex: self.error(0, "Can not access database.") self.results_list = results_list mappings_key_dict = dict(((m["data_id"], m["id"]), key) \ for key, m in self.mappings.items()) def mapping_unique_id(annot): """Map annotations dict from results_list to unique `mappings` ids. """ data_id, mappings_id = annot["data_id"], annot["mappings_id"] return mappings_key_dict[data_id, mappings_id] elements = [] # softly change the view so that the selection stays the same items_shown = {} for i, item in enumerate(self.items): c = str(item.text(10)) items_shown[c] = i items_to_show = dict((mapping_unique_id(annot), annot) for annot in self.results_list.values()) add_items = set(items_to_show) - set(items_shown) delete_items = set(items_shown) - set(items_to_show) i = 0 while i < self.experimentsWidget.topLevelItemCount(): it = self.experimentsWidget.topLevelItem(i) if str(it.text(10)) in delete_items: self.experimentsWidget.takeTopLevelItem(i) else: i += 1 delete_ind = set([items_shown[i] for i in delete_items]) self.items = [ it for i, it in enumerate(self.items) if i not in delete_ind ] for r_annot in [items_to_show[i] for i in add_items]: d = defaultdict(lambda: "?", r_annot) row_items = [""] + [d.get(key, "?") for key, _ in HEADER[1:]] try: time_dict = literal_eval(row_items[DATE_INDEX]) date_rna = date( time_dict["fullYearUTC"], time_dict["monthUTC"] + 1, # Why is month 0 based? time_dict["dateUTC"]) row_items[DATE_INDEX] = date_rna.strftime("%x") except Exception: row_items[DATE_INDEX] = '' row_items[ID_INDEX] = mapping_unique_id(r_annot) elements.append(row_items) ci = MyTreeWidgetItem(self.experimentsWidget, row_items) self.items.append(ci) for i in range(len(self.headerLabels)): self.experimentsWidget.resizeColumnToContents(i) # which is the ok buffer version # FIXME: what attribute to use for version? self.wantbufver = \ lambda x, ad=self.results_list: \ defaultdict(lambda: "?", ad[x])["date"] self.wantbufver = lambda x: "0" self.UpdateCached() def UpdateCached(self): if self.wantbufver and self.dbc: fn = self.dbc.download_key_function() result_id_key = dict(((m["data_id"], m["mappings_id"]), key) \ for key, m in self.results_list.items()) for item in self.items: c = str(item.text(10)) mapping = self.mappings[c] data_id, mappings_id = mapping["data_id"], mapping["id"] r_id = result_id_key[data_id, mappings_id] # Get the buffered version buffered = self.dbc.inBuffer(fn(r_id)) value = " " if buffered == self.wantbufver(r_id) else "" item.setData(0, Qt.DisplayRole, value) def SearchUpdate(self, string=""): for item in self.items: item.setHidden(not all(s in item \ for s in self.searchString.split()) ) def Commit(self): if not self.dbc: self.Connect() pb = gui.ProgressBar(self, iterations=100) table = None ids = [] for item in self.experimentsWidget.selectedItems(): unique_id = str(item.text(10)) annots = self.mappings[unique_id] ids.append((annots["data_id"], annots["id"])) transfn = None if self.log2: transfn = lambda x: math.log(x + 1.0, 2) reverse_header_dict = dict((name, key) for key, name in HEADER) hview = self.experimentsWidget.header() shownHeaders = [label for i, label in \ list(enumerate(self.headerLabels))[1:] \ if not hview.isSectionHidden(i) ] allowed_labels = [reverse_header_dict.get(label, label) \ for label in shownHeaders] if self.joinreplicates and "id" not in allowed_labels: # need 'id' labels in join_replicates for attribute names allowed_labels.append("id") if len(ids): table = self.dbc.get_data( ids=ids, result_type=self.rtype(), callback=pb.advance, exclude_constant_labels=self.excludeconstant, # bufver=self.wantbufver, transform=transfn, allowed_labels=allowed_labels) if self.joinreplicates: table = dicty.join_replicates(table, ignorenames=[ "replicate", "data_id", "mappings_id", "data_name", "id", "unique_id" ], namefn=None, avg=dicty.median) # Sort attributes sortOrder = self.columnsSortingWidget.sortingOrder all_values = defaultdict(set) for at in table.domain.attributes: atts = at.attributes for name in sortOrder: all_values[name].add( atts.get(reverse_header_dict[name], "")) isnum = {} for at, vals in all_values.items(): vals = filter(None, vals) try: for a in vals: float(a) isnum[at] = True except: isnum[at] = False def optfloat(x, at): if x == "": return "" else: return float(x) if isnum[at] else x def sorting_key(attr): atts = attr.attributes return tuple([optfloat(atts.get(reverse_header_dict[name], ""), name) \ for name in sortOrder]) attributes = sorted(table.domain.attributes, key=sorting_key) domain = Orange.data.Domain(attributes, table.domain.class_var, table.domain.metas) table = table.from_table(domain, table) data_hints.set_hint(table, "taxid", "352472") data_hints.set_hint(table, "genesinrows", False) self.send("Data", table) self.UpdateCached() pb.finish() def onSelectionChanged(self, selected, deselected): self.handle_commit_button() def handle_commit_button(self): self.currentSelection = \ SelectionByKey(self.experimentsWidget.selectionModel().selection(), key=(1, 2, 3, 10)) self.commit_button.setDisabled(not len(self.currentSelection)) def saveHeaderState(self): hview = self.experimentsWidget.header() for i, label in enumerate(self.headerLabels): self.experimentsHeaderState[label] = hview.isSectionHidden(i) def restoreHeaderState(self): hview = self.experimentsWidget.header() state = self.experimentsHeaderState for i, label in enumerate(self.headerLabels): hview.setSectionHidden(i, state.get(label, True)) self.experimentsWidget.resizeColumnToContents(i)
class OWConfusionMatrix(widget.OWWidget): name = "Confusion Matrix" description = "Display confusion matrix constructed from results " \ "of evaluation of classifiers." icon = "icons/ConfusionMatrix.svg" priority = 1001 inputs = [("Evaluation Results", Orange.evaluation.Results, "set_results")] outputs = [("Selected Data", Orange.data.Table)] quantities = ["Number of instances", "Proportion of predicted", "Proportion of actual"] selected_learner = settings.Setting([]) selected_quantity = settings.Setting(0) append_predictions = settings.Setting(True) append_probabilities = settings.Setting(False) autocommit = settings.Setting(True) UserAdviceMessages = [ widget.Message( "Clicking on cells or in headers outputs the corresponding " "data instances", "click_cell")] def __init__(self): super().__init__() self.data = None self.results = None self.learners = [] self.headers = [] box = gui.widgetBox(self.controlArea, "Learners") self.learners_box = gui.listBox( box, self, "selected_learner", "learners", callback=self._learner_changed ) box = gui.widgetBox(self.controlArea, "Show") gui.comboBox(box, self, "selected_quantity", items=self.quantities, callback=self._update) box = gui.widgetBox(self.controlArea, "Select") gui.button(box, self, "Correct", callback=self.select_correct, autoDefault=False) gui.button(box, self, "Misclassified", callback=self.select_wrong, autoDefault=False) gui.button(box, self, "None", callback=self.select_none, autoDefault=False) self.outputbox = box = gui.widgetBox(self.controlArea, "Output") gui.checkBox(box, self, "append_predictions", "Predictions", callback=self._invalidate) gui.checkBox(box, self, "append_probabilities", "Probabilities", callback=self._invalidate) gui.auto_commit(self.controlArea, self, "autocommit", "Send Data", "Auto send is on") grid = QGridLayout() self.tablemodel = QStandardItemModel(self) view = self.tableview = QTableView( editTriggers=QTableView.NoEditTriggers) view.setModel(self.tablemodel) view.horizontalHeader().hide() view.verticalHeader().hide() view.horizontalHeader().setMinimumSectionSize(60) view.selectionModel().selectionChanged.connect(self._invalidate) view.setShowGrid(False) view.clicked.connect(self.cell_clicked) grid.addWidget(view, 0, 0) self.mainArea.layout().addLayout(grid) def sizeHint(self): return QSize(750, 490) def _item(self, i, j): return self.tablemodel.item(i, j) or QStandardItem() def _set_item(self, i, j, item): self.tablemodel.setItem(i, j, item) def set_results(self, results): """Set the input results.""" self.clear() self.warning([0, 1]) data = None if results is not None: if results.data is not None: data = results.data if data is not None and not data.domain.has_discrete_class: data = None results = None self.warning( 0, "Confusion Matrix cannot be used for regression results.") self.results = results self.data = data if data is not None: class_values = data.domain.class_var.values elif results is not None: raise NotImplementedError if results is not None: nmodels, ntests = results.predicted.shape self.headers = class_values + \ [unicodedata.lookup("N-ARY SUMMATION")] # NOTE: The 'learner_names' is set in 'Test Learners' widget. if hasattr(results, "learner_names"): self.learners = results.learner_names else: self.learners = ["Learner #%i" % (i + 1) for i in range(nmodels)] item = self._item(0, 2) item.setData("Predicted", Qt.DisplayRole) item.setTextAlignment(Qt.AlignCenter) item.setFlags(Qt.NoItemFlags) self._set_item(0, 2, item) item = self._item(2, 0) item.setData("Actual", Qt.DisplayRole) item.setTextAlignment(Qt.AlignHCenter | Qt.AlignBottom) item.setFlags(Qt.NoItemFlags) self.tableview.setItemDelegateForColumn( 0, gui.VerticalItemDelegate()) self._set_item(2, 0, item) self.tableview.setSpan(0, 2, 1, len(class_values)) self.tableview.setSpan(2, 0, len(class_values), 1) for i in (0, 1): for j in (0, 1): item = self._item(i, j) item.setFlags(Qt.NoItemFlags) self._set_item(i, j, item) for p, label in enumerate(self.headers): for i, j in ((1, p + 2), (p + 2, 1)): item = self._item(i, j) item.setData(label, Qt.DisplayRole) item.setData(QBrush(QColor(208, 208, 208)), Qt.BackgroundColorRole) item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) item.setFlags(Qt.ItemIsEnabled) self._set_item(i, j, item) hor_header = self.tableview.horizontalHeader() if len(' '.join(self.headers)) < 120: hor_header.setResizeMode(QHeaderView.ResizeToContents) else: hor_header.setDefaultSectionSize(60) self.tablemodel.setRowCount(len(class_values) + 3) self.tablemodel.setColumnCount(len(class_values) + 3) self.selected_learner = [0] self._update() def clear(self): self.results = None self.data = None self.tablemodel.clear() self.headers = [] # Clear learners last. This action will invoke `_learner_changed` # method self.learners = [] def select_correct(self): selection = QItemSelection() n = self.tablemodel.rowCount() for i in range(2, n): index = self.tablemodel.index(i, i) selection.select(index, index) self.tableview.selectionModel().select( selection, QItemSelectionModel.ClearAndSelect ) def select_wrong(self): selection = QItemSelection() n = self.tablemodel.rowCount() for i in range(2, n): for j in range(i + 1, n): index = self.tablemodel.index(i, j) selection.select(index, index) index = self.tablemodel.index(j, i) selection.select(index, index) self.tableview.selectionModel().select( selection, QItemSelectionModel.ClearAndSelect ) def select_none(self): self.tableview.selectionModel().clear() def cell_clicked(self, model_index): i, j = model_index.row(), model_index.column() if not i or not j: return n = self.tablemodel.rowCount() index = self.tablemodel.index selection = None if i == j == 1 or i == j == n - 1: selection = QItemSelection(index(2, 2), index(n - 1, n - 1)) elif i in (1, n - 1): selection = QItemSelection(index(2, j), index(n - 1, j)) elif j in (1, n - 1): selection = QItemSelection(index(i, 2), index(i, n - 1)) if selection is not None: self.tableview.selectionModel().select( selection, QItemSelectionModel.ClearAndSelect ) def commit(self): if self.results is not None and self.data is not None \ and self.selected_learner: indices = self.tableview.selectedIndexes() indices = {(ind.row() - 2, ind.column() - 2) for ind in indices} actual = self.results.actual selected_learner = self.selected_learner[0] learner_name = self.learners[selected_learner] predicted = self.results.predicted[selected_learner] selected = [i for i, t in enumerate(zip(actual, predicted)) if t in indices] row_indices = self.results.row_indices[selected] extra = [] class_var = self.data.domain.class_var metas = self.data.domain.metas if self.append_predictions: predicted = numpy.array(predicted[selected], dtype=object) extra.append(predicted.reshape(-1, 1)) var = Orange.data.DiscreteVariable( "{}({})".format(class_var.name, learner_name), class_var.values ) metas = metas + (var,) if self.append_probabilities and \ self.results.probabilities is not None: probs = self.results.probabilities[selected_learner, selected] extra.append(numpy.array(probs, dtype=object)) pvars = [Orange.data.ContinuousVariable("p({})".format(value)) for value in class_var.values] metas = metas + tuple(pvars) X = self.data.X[row_indices] Y = self.data.Y[row_indices] M = self.data.metas[row_indices] row_ids = self.data.ids[row_indices] M = numpy.hstack((M,) + tuple(extra)) domain = Orange.data.Domain( self.data.domain.attributes, self.data.domain.class_vars, metas ) data = Orange.data.Table.from_numpy(domain, X, Y, M) data.ids = row_ids data.name = learner_name else: data = None self.send("Selected Data", data) def _invalidate(self): self.commit() def _learner_changed(self): # The selected learner has changed indices = self.tableview.selectedIndexes() self._update() selection = QItemSelection() for sel in indices: selection.select(sel, sel) self.tableview.selectionModel().select( selection, QItemSelectionModel.ClearAndSelect ) self.commit() def _update(self): # Update the displayed confusion matrix if self.results is not None and self.selected_learner: index = self.selected_learner[0] cmatrix = confusion_matrix(self.results, index) colsum = cmatrix.sum(axis=0) rowsum = cmatrix.sum(axis=1) total = rowsum.sum() if self.selected_quantity == 0: value = lambda i, j: int(cmatrix[i, j]) elif self.selected_quantity == 1: value = lambda i, j: \ ("{:2.1f} %".format(100 * cmatrix[i, j] / colsum[i]) if colsum[i] else "N/A") elif self.selected_quantity == 2: value = lambda i, j: \ ("{:2.1f} %".format(100 * cmatrix[i, j] / rowsum[i]) if colsum[i] else "N/A") else: assert False for i, row in enumerate(cmatrix): for j, _ in enumerate(row): item = self._item(i + 2, j + 2) item.setData(value(i, j), Qt.DisplayRole) item.setToolTip("actual: {}\npredicted: {}".format( self.headers[i], self.headers[j])) item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) item.setFlags(Qt.ItemIsEnabled | Qt.ItemIsSelectable) self._set_item(i + 2, j + 2, item) model = self.tablemodel font = model.invisibleRootItem().font() bold_font = QFont(font) bold_font.setBold(True) def sum_item(value): item = QStandardItem() item.setData(value, Qt.DisplayRole) item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) item.setFlags(Qt.ItemIsEnabled) item.setFont(bold_font) return item N = len(colsum) for i in range(N): model.setItem(N + 2, i + 2, sum_item(int(colsum[i]))) model.setItem(i + 2, N + 2, sum_item(int(rowsum[i]))) model.setItem(N + 2, N + 2, sum_item(int(total)))
class OWDuplicates(widget.OWWidget): name = '重复文档检测' description = '检测和删除语料库中的重复文档' icon = 'icons/Duplicates.svg' priority = 700 class Inputs: distances = Input("Distances", DistMatrix) class Outputs: corpus_without_duplicates = Output("Corpus Without Duplicates", Corpus) duplicates = Output("Duplicates Cluster", Corpus) corpus = Output("Corpus", Corpus) resizing_enabled = True class Error(OWWidget.Error): dist_matrix_invalid_shape = Msg('Duplicate detection only supports ' 'distances calculated between rows.') too_little_documents = Msg('More than one document is required.') LINKAGE = ['Single', 'Average', 'Complete', 'Weighted', 'Ward'] linkage_method = settings.Setting(1) threshold = settings.Setting(.0) # Cluster variable domain role AttributeRole, ClassRole, MetaRole = 0, 1, 2 CLUSTER_ROLES = ["Attributes", "Class", "Metas"] cluster_role = settings.Setting(2) def __init__(self): super().__init__() self.corpus = None # corpus taken from distances self.linkage = None # hierarchical clustering linkage as returned by Orange self.distances = None # DistMatrix on input self.clustering_mask = None # 1D array of clusters for self.corpus self.threshold_spin = None # Info self.n_documents = '' self.n_unique = '' self.n_duplicates = '' info_box = gui.widgetBox(self.controlArea, box='Info') gui.label(info_box, self, 'Documents: %(n_documents)s') gui.label(info_box, self, ' ◦ unique: %(n_unique)s') gui.label(info_box, self, ' ◦ duplicates: %(n_duplicates)s') # Threshold Histogram & Cluster View self.histogram = Histogram(self) self.table_view = gui.TableView( selectionMode=QListView.SingleSelection) self.table_model = PyTableModel() self.table_model.setHorizontalHeaderLabels(['Cluster', 'Size']) self.table_view.setModel(self.table_model) self.table_view.selectionModel().selectionChanged.connect( self.send_duplicates) # Add to main area height = 300 main_area = gui.hBox(self.mainArea) self.histogram.setMinimumWidth(500) self.histogram.setMinimumHeight(height) self.table_view.setFixedWidth(140) main_area.layout().addWidget(self.histogram) main_area.layout().addWidget(self.table_view) # Controls gui.comboBox(self.controlArea, self, 'linkage_method', items=self.LINKAGE, box='Linkage', callback=self.recalculate_linkage, orientation=Qt.Horizontal) self.threshold_spin = gui.doubleSpin(self.controlArea, self, 'threshold', 0, float('inf'), 0.01, decimals=2, label='Distance threshold', box='Distances', callback=self.threshold_changed, keyboardTracking=False, controlWidth=60) self.histogram.region.sigRegionChangeFinished.connect( self.threshold_from_histogram_region) self.threshold_spin.setEnabled(False) gui.rubber(self.controlArea) # Output gui.comboBox(self.controlArea, self, "cluster_role", box='Output', label='Append Cluster IDs to:', callback=self.send_corpus, items=self.CLUSTER_ROLES) def reset(self): self.corpus = None self.linkage = None self.distances = None self.clustering_mask = None self.n_documents = '' self.n_unique = '' self.n_duplicates = '' self.threshold = 0 self.threshold_spin.setEnabled(False) self.table_model.clear() self.histogram.setValues([]) @Inputs.distances def set_distances(self, distances): self.Error.clear() self.distances = distances if distances is None: self.reset() return self.corpus = self.distances.row_items self.n_documents = len(self.corpus) if self.n_documents < 2: self.Error.too_little_documents() self.reset() return if distances.shape != (self.n_documents, self.n_documents): self.Error.dist_matrix_invalid_shape() self.reset() return self.threshold_spin.setEnabled(True) self.recalculate_linkage() def threshold_from_histogram_region(self): _, self.threshold = self.histogram.getRegion() self.threshold_changed() def threshold_changed(self): self.threshold = np.clip(self.threshold, *self.histogram.boundary()) self.histogram.setRegion(0, self.threshold) self.detect_duplicates() def recalculate_linkage(self): if self.distances is not None: self.linkage = dist_matrix_linkage( self.distances, self.LINKAGE[self.linkage_method].lower()) # Magnitude of the spinbox's step is data-dependent vals = sorted(self.linkage[:, 2]) low, up = vals[0], vals[-1] step = (up - low) / 20 self.threshold_spin.setSingleStep(step) self.threshold = np.clip(self.threshold, low, up) self.histogram.setValues( []) # without this range breaks when changing linkages self.histogram.setValues(vals) self.histogram.setRegion(0, self.threshold) self.detect_duplicates() def detect_duplicates(self): if self.distances is not None: self.cluster_linkage() self.send_corpus() self.send_corpus_without_duplicates() self.fill_cluster_view() def cluster_linkage(self): # cluster documents n = int(self.n_documents) clusters = {j: [j] for j in range(n)} for i, (c1, c2, dist, size) in enumerate(self.linkage): if dist > self.threshold: break clusters[n + i] = clusters[c1] + clusters[c2] del clusters[c1] del clusters[c2] self.n_unique = len(clusters) self.n_duplicates = n - self.n_unique # create mask self.clustering_mask = np.empty(n, dtype=int) for i, c in enumerate(clusters.values()): self.clustering_mask[c] = i def fill_cluster_view(self): self.table_model.clear() c = Counter(self.clustering_mask) for id_, count in c.items(): self.table_model.append([Cluster(id_), count]) self.table_view.sortByColumn(1, Qt.DescendingOrder) self.table_view.selectRow(0) def send_corpus(self): if self.clustering_mask is not None: cluster_var = DiscreteVariable( 'Duplicates Cluster', values=[ str(Cluster(v)) for v in set(self.clustering_mask.flatten()) ]) corpus, domain = self.corpus, self.corpus.domain attrs = domain.attributes class_ = domain.class_vars metas = domain.metas if self.cluster_role == self.AttributeRole: attrs = attrs + (cluster_var, ) elif self.cluster_role == self.ClassRole: class_ = class_ + (cluster_var, ) elif self.cluster_role == self.MetaRole: metas = metas + (cluster_var, ) domain = Domain(attrs, class_, metas) corpus = corpus.from_table(domain, corpus) corpus.get_column_view(cluster_var)[0][:] = self.clustering_mask self.Outputs.corpus.send(corpus) else: self.Outputs.corpus.send(None) def send_corpus_without_duplicates(self): if self.clustering_mask is not None: # TODO make this more general, currently we just take the first document mask = [ np.where(self.clustering_mask == i)[0][0] for i in set(self.clustering_mask) ] c = self.corpus[mask] c.name = '{} (Without Duplicates)'.format(self.corpus.name) self.Outputs.corpus_without_duplicates.send(c) else: self.Outputs.corpus_without_duplicates.send(None) def send_duplicates(self): c = None indices = self.table_view.selectionModel().selectedIndexes() if indices: cluster = self.table_view.model().data(indices[0], Qt.EditRole) mask = np.flatnonzero(self.clustering_mask == cluster.id) c = self.corpus[mask] c.name = '{} {}'.format(self.Outputs.duplicates.name, cluster) self.Outputs.duplicates.send(c) def send_report(self): self.report_items([ ('Linkage', self.LINKAGE[self.linkage_method]), ('Distance threshold', '{:.2f}'.format(self.threshold)), ('Documents', self.n_documents), ('Unique', self.n_unique), ('Duplicates', self.n_duplicates), ])
class OWPCA(widget.OWWidget): name = "PCA" description = "Principal component analysis with a scree-diagram." icon = "icons/PCA.svg" priority = 3050 keywords = ["principal component analysis", "linear transformation"] class Inputs: data = Input("Data", Table) class Outputs: transformed_data = Output("Transformed Data", Table, replaces=["Transformed data"]) data = Output("Data", Table, default=True) components = Output("Components", Table) pca = Output("PCA", PCA, dynamic=False) settingsHandler = settings.DomainContextHandler() ncomponents = settings.Setting(2) variance_covered = settings.Setting(100) auto_commit = settings.Setting(True) normalize = settings.ContextSetting(True) maxp = settings.Setting(20) axis_labels = settings.Setting(10) graph_name = "plot.plotItem" class Warning(widget.OWWidget.Warning): trivial_components = widget.Msg( "All components of the PCA are trivial (explain 0 variance). " "Input data is constant (or near constant).") class Error(widget.OWWidget.Error): no_features = widget.Msg("At least 1 feature is required") no_instances = widget.Msg("At least 1 data instance is required") def __init__(self): super().__init__() self.data = None self._pca = None self._transformed = None self._variance_ratio = None self._cumulative = None self._init_projector() # Components Selection box = gui.vBox(self.controlArea, "Components Selection") form = QFormLayout() box.layout().addLayout(form) self.components_spin = gui.spin( box, self, "ncomponents", 1, MAX_COMPONENTS, callback=self._update_selection_component_spin, keyboardTracking=False) self.components_spin.setSpecialValueText("All") self.variance_spin = gui.spin( box, self, "variance_covered", 1, 100, callback=self._update_selection_variance_spin, keyboardTracking=False) self.variance_spin.setSuffix("%") form.addRow("Components:", self.components_spin) form.addRow("Explained variance:", self.variance_spin) # Options self.options_box = gui.vBox(self.controlArea, "Options") self.normalize_box = gui.checkBox(self.options_box, self, "normalize", "Normalize variables", callback=self._update_normalize) self.maxp_spin = gui.spin(self.options_box, self, "maxp", 1, MAX_COMPONENTS, label="Show only first", callback=self._setup_plot, keyboardTracking=False) self.controlArea.layout().addStretch() gui.auto_apply(self.controlArea, self, "auto_commit") self.plot = SliderGraph("Principal Components", "Proportion of variance", self._on_cut_changed) self.mainArea.layout().addWidget(self.plot) self._update_normalize() @Inputs.data def set_data(self, data): self.closeContext() self.clear_messages() self.clear() self.information() self.data = None if isinstance(data, SqlTable): if data.approx_len() < AUTO_DL_LIMIT: data = Table(data) else: self.information("Data has been sampled") data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(2000, partial=True) data = Table(data_sample) if isinstance(data, Table): if not data.domain.attributes: self.Error.no_features() self.clear_outputs() return if not data: self.Error.no_instances() self.clear_outputs() return self.openContext(data) self._init_projector() self.data = data self.fit() def fit(self): self.clear() self.Warning.trivial_components.clear() if self.data is None: return data = self.data if self.normalize: self._pca_projector.preprocessors = \ self._pca_preprocessors + [preprocess.Normalize(center=False)] else: self._pca_projector.preprocessors = self._pca_preprocessors if not isinstance(data, SqlTable): pca = self._pca_projector(data) variance_ratio = pca.explained_variance_ratio_ cumulative = numpy.cumsum(variance_ratio) if numpy.isfinite(cumulative[-1]): self.components_spin.setRange(0, len(cumulative)) self._pca = pca self._variance_ratio = variance_ratio self._cumulative = cumulative self._setup_plot() else: self.Warning.trivial_components() self.unconditional_commit() def clear(self): self._pca = None self._transformed = None self._variance_ratio = None self._cumulative = None self.plot.clear_plot() def clear_outputs(self): self.Outputs.transformed_data.send(None) self.Outputs.data.send(None) self.Outputs.components.send(None) self.Outputs.pca.send(self._pca_projector) def _setup_plot(self): if self._pca is None: self.plot.clear_plot() return explained_ratio = self._variance_ratio explained = self._cumulative cutpos = self._nselected_components() p = min(len(self._variance_ratio), self.maxp) self.plot.update(numpy.arange(1, p + 1), [explained_ratio[:p], explained[:p]], [Qt.red, Qt.darkYellow], cutpoint_x=cutpos, names=LINE_NAMES) self._update_axis() def _on_cut_changed(self, components): if components == self.ncomponents \ or self.ncomponents == 0 \ or self._pca is not None \ and components == len(self._variance_ratio): return self.ncomponents = components if self._pca is not None: var = self._cumulative[components - 1] if numpy.isfinite(var): self.variance_covered = int(var * 100) self._invalidate_selection() def _update_selection_component_spin(self): # cut changed by "ncomponents" spin. if self._pca is None: self._invalidate_selection() return if self.ncomponents == 0: # Special "All" value cut = len(self._variance_ratio) else: cut = self.ncomponents var = self._cumulative[cut - 1] if numpy.isfinite(var): self.variance_covered = int(var * 100) self.plot.set_cut_point(cut) self._invalidate_selection() def _update_selection_variance_spin(self): # cut changed by "max variance" spin. if self._pca is None: return cut = numpy.searchsorted(self._cumulative, self.variance_covered / 100.0) + 1 cut = min(cut, len(self._cumulative)) self.ncomponents = cut self.plot.set_cut_point(cut) self._invalidate_selection() def _update_normalize(self): self.fit() if self.data is None: self._invalidate_selection() def _init_projector(self): self._pca_projector = PCA(n_components=MAX_COMPONENTS, random_state=0) self._pca_projector.component = self.ncomponents self._pca_preprocessors = PCA.preprocessors def _nselected_components(self): """Return the number of selected components.""" if self._pca is None: return 0 if self.ncomponents == 0: # Special "All" value max_comp = len(self._variance_ratio) else: max_comp = self.ncomponents var_max = self._cumulative[max_comp - 1] if var_max != numpy.floor(self.variance_covered / 100.0): cut = max_comp assert numpy.isfinite(var_max) self.variance_covered = int(var_max * 100) else: self.ncomponents = cut = numpy.searchsorted( self._cumulative, self.variance_covered / 100.0) + 1 return cut def _invalidate_selection(self): self.commit() def _update_axis(self): p = min(len(self._variance_ratio), self.maxp) axis = self.plot.getAxis("bottom") d = max((p - 1) // (self.axis_labels - 1), 1) axis.setTicks([[(i, str(i)) for i in range(1, p + 1, d)]]) def commit(self): transformed = data = components = None if self._pca is not None: if self._transformed is None: # Compute the full transform (MAX_COMPONENTS components) once. self._transformed = self._pca(self.data) transformed = self._transformed domain = Domain(transformed.domain.attributes[:self.ncomponents], self.data.domain.class_vars, self.data.domain.metas) transformed = transformed.from_table(domain, transformed) # prevent caching new features by defining compute_value dom = Domain([ ContinuousVariable(a.name, compute_value=lambda _: None) for a in self._pca.orig_domain.attributes ], metas=[StringVariable(name='component')]) metas = numpy.array( [['PC{}'.format(i + 1) for i in range(self.ncomponents)]], dtype=object).T components = Table(dom, self._pca.components_[:self.ncomponents], metas=metas) components.name = 'components' data_dom = Domain(self.data.domain.attributes, self.data.domain.class_vars, self.data.domain.metas + domain.attributes) data = Table.from_numpy( data_dom, self.data.X, self.data.Y, numpy.hstack((self.data.metas, transformed.X))) self._pca_projector.component = self.ncomponents self.Outputs.transformed_data.send(transformed) self.Outputs.components.send(components) self.Outputs.data.send(data) self.Outputs.pca.send(self._pca_projector) def send_report(self): if self.data is None: return self.report_items( (("Normalize data", str(self.normalize)), ("Selected components", self.ncomponents), ("Explained variance", "{:.3f} %".format(self.variance_covered)))) self.report_plot() @classmethod def migrate_settings(cls, settings, version): if "variance_covered" in settings: # Due to the error in gh-1896 the variance_covered was persisted # as a NaN value, causing a TypeError in the widgets `__init__`. vc = settings["variance_covered"] if isinstance(vc, numbers.Real): if numpy.isfinite(vc): vc = int(vc) else: vc = 100 settings["variance_covered"] = vc if settings.get("ncomponents", 0) > MAX_COMPONENTS: settings["ncomponents"] = MAX_COMPONENTS # Remove old `decomposition_idx` when SVD was still included settings.pop("decomposition_idx", None) # Remove RemotePCA settings settings.pop("batch_size", None) settings.pop("address", None) settings.pop("auto_update", None)
class OWFiltering(widget.OWWidget): name = "Filtering" description = "Filter audio clips" priority = 2 icon = "icons/filtering.png" inputs = [("Data", Orange.data.Table, "set_data")] filter_design_id = settings.Setting(0) band_type_id = settings.Setting(0) first_cutoff = settings.Setting(1000) second_cutoff = settings.Setting(1000) filter_order = settings.Setting(10) maximum_ripple = settings.Setting(10) minimum_attenuation = settings.Setting(10) outputs = [("Filtered data", Orange.data.Table)] want_main_area = False data = None def __init__(self): super().__init__() self.tmp_dir_id = str(time.time()).split(".")[-1] self.new_tmp_dirs = [] info_box = gui.widgetBox(self.controlArea, "Info") self.info = gui.widgetLabel( info_box, 'No data on input yet, waiting to get something.') self.filter_designs_combo = gui.comboBox( self.controlArea, self, "filter_design_id", box="Filter designs", items=[ m for m in filter_designs], ) self.filter_designs_combo.activated.connect(self.onDesignChange) self.band_types_combo = gui.comboBox( self.controlArea, self, "band_type_id", box="Band types", items=[ m for m in band_types], ) self.band_types_combo.activated.connect(self.onTypeChange) parameters_box = gui.widgetBox(self.controlArea, 'Parameters') self.first_cutoff_spin = gui.spin( parameters_box, self, "first_cutoff", minv=1, maxv=10000, controlWidth=80, alignment=Qt.AlignRight, label="First cutoff frequency [Hz]: ", spinType=float, decimals=2) self.second_cutoff_spin = gui.spin( parameters_box, self, "second_cutoff", minv=1, maxv=10000, controlWidth=80, alignment=Qt.AlignRight, label="Second cutoff frequency [Hz]: ", spinType=float, decimals=2) self.filter_order_spin = gui.spin( parameters_box, self, "filter_order", minv=1, maxv=10000, controlWidth=80, alignment=Qt.AlignRight, label="Order: ") self.maximum_ripple_spin = gui.spin( parameters_box, self, "maximum_ripple", minv=1, maxv=10000, controlWidth=80, alignment=Qt.AlignRight, label="Maximum ripple [dB]: ", spinType=float, decimals=2) self.minimum_attenuation_spin = gui.spin( parameters_box, self, "minimum_attenuation", minv=1, maxv=10000, controlWidth=80, alignment=Qt.AlignRight, label="Minimum attenuation [dB]: ", spinType=float, decimals=2) self.filter_button = gui.button( self.controlArea, self, "Filter", callback=lambda: self.call_filter( self.filter_designs_combo.currentText(), self.band_types_combo.currentText(), self.first_cutoff, self.second_cutoff, self.filter_order, self.maximum_ripple, self.minimum_attenuation)) self.onDesignChange() def set_data(self, dataset): """ Set data from input :param dataset: input data :return: Void """ if dataset is not None: self.info.setText('%d instances in input data set' % len(dataset)) self.data = dataset else: self.infoa.setText( 'No data on input yet, waiting to get something.') self.send("Filtered data", None) def allSpinHandle(self, handle): """ Helper function which handle all spines at once :param handle: handle parameter (true -> enable, false -> disable) :return: Void """ self.first_cutoff_spin.setEnabled(handle) self.second_cutoff_spin.setEnabled(handle) self.filter_order_spin.setEnabled(handle) self.maximum_ripple_spin.setEnabled(handle) self.minimum_attenuation_spin.setEnabled(handle) def onDesignChange(self): """ When the desgin changes, it changes the options of the parameters :return: Void """ self.allSpinHandle(True) if self.filter_design_id == 0 or self.filter_design_id == 1 or self.filter_design_id == 5: self.second_cutoff_spin.setEnabled(False) self.maximum_ripple_spin.setEnabled(False) self.minimum_attenuation_spin.setEnabled(False) elif self.filter_design_id == 2: self.second_cutoff_spin.setEnabled(False) self.minimum_attenuation_spin.setEnabled(False) elif self.filter_design_id == 3: self.second_cutoff_spin.setEnabled(False) self.maximum_ripple_spin.setEnabled(False) self.onTypeChange() def onTypeChange(self): """ When the type changes, it changes the options of the parameters :return: Void """ if self.band_type_id == 2 or self.band_type_id == 3: self.second_cutoff_spin.setEnabled(True) else: self.second_cutoff_spin.setEnabled(False) def call_filter( self, filter_type, filter_band, first_cutoff, second_cutoff, order, max_ripple, min_attenuation): """ Call specified filter function on all audio clips :param filter_type: type of filter :param filter_band: band of filter :param first_cutoff: first cutoff frequency :param second_cutoff: second cutoff frequency :param order: filter order :param max_ripple: the maximum ripple :param min_attenuation: the minimum attenuatio :return: Void """ if self.data is None: return filterBand = (''.join(c for c in filter_band if c not in "-")).lower() filterType = self.convertTypeToStr(filter_type) error = None self.X = [] self.metas = [] try: for i in range(len(self.data.metas)): if self.data.X != []: input_data = self.data.X[i] else: input_data = read(self.data.metas[i][1])[1] if len(input_data.shape) > 1: input_data = input_data[:, 0] if filterType == "FIR" or filterType == "butter" or filterType == "bessel": if filterBand == "lowpass" or filterBand == "highpass": filtered = st.filter_signal(input_data, ftype=filterType, band=filterBand, order=order, frequency=first_cutoff, sampling_rate=self.data.metas[i][-1]) else: filtered = st.filter_signal(input_data, ftype=filterType, band=filterBand, order=order, frequency=[first_cutoff, second_cutoff], sampling_rate=self.data.metas[i][-1]) elif filterType == "cheby1": if filterBand == "lowpass" or filterBand == "highpass": filtered = st.filter_signal(input_data, ftype=filterType, band=filterBand, order=order, frequency=first_cutoff, sampling_rate=self.data.metas[i][-1], rp=max_ripple) else: filtered = st.filter_signal(input_data, ftype=filterType, band=filterBand, order=order, frequency=[first_cutoff, second_cutoff], sampling_rate=self.data.metas[i][-1], rp=max_ripple) elif filterType == "cheby2": if filterBand == "lowpass" or filterBand == "highpass": filtered = st.filter_signal(input_data, ftype=filterType, band=filterBand, order=order, frequency=first_cutoff, sampling_rate=self.data.metas[i][-1], rs=min_attenuation) else: filtered = st.filter_signal(input_data, ftype=filterType, band=filterBand, order=order, frequency=[first_cutoff, second_cutoff], sampling_rate=self.data.metas[i][-1], rs=min_attenuation) else: if filterBand == "lowpass" or filterBand == "highpass": filtered = st.filter_signal(input_data, ftype=filterType, band=filterBand, order=order, frequency=first_cutoff, sampling_rate=self.data.metas[i][-1], rp=max_ripple, rs=min_attenuation) else: filtered = st.filter_signal(input_data, ftype=filterType, band=filterBand, order=order, frequency=[first_cutoff, second_cutoff], sampling_rate=self.data.metas[i][-1], rp=max_ripple, rs=min_attenuation) self.new_tmp_dir = os.path.dirname( self.data.metas[i][1]) + os.sep + "filtered-" + self.tmp_dir_id + os.sep if not os.path.exists(self.new_tmp_dir): os.makedirs(self.new_tmp_dir) self.new_tmp_dirs.append(self.new_tmp_dir) filename = self.new_tmp_dir + self.data.metas[i][0] + ".wav" self.metas.append([self.data.metas[i][0], filename, self.data.metas[i][2], self.data.metas[i][3], self.data.metas[i][4]]) data = filtered["signal"] data = data / data.max() data = data * (2 ** 15 - 1) data = data.astype(numpy.int16) write(filename, self.data.metas[i][-1], data) except Exception as ex: error = ex if not error: self.info.setStyleSheet(success_green) self.info.setText( filter_type + " " + filter_band + " " + "filter successful!") orange_table = Orange.data.Table.from_numpy( self.data.domain, numpy.empty((len(self.data.Y), 0), dtype=float), self.data.Y, self.metas ) self.send("Filtered data", orange_table) if error: self.info.setStyleSheet(error_red) self.info.setText("An error occurred:\n{}".format(error)) return def convertTypeToStr(self, filter_type): """ Helper function which convert specified type of filter in a coded string :param filter_type: type of filter :return: coded type of filter """ if filter_type == "Finite Impulse Response": typeStr = "FIR" elif filter_type == "Butterworth": typeStr = "butter" elif filter_type == "Chebyshev 1": typeStr = "cheby1" elif filter_type == "Chebyshev 2": typeStr = "cheby2" elif filter_type == "Elliptic": typeStr = "ellip" else: typeStr = "bessel" return typeStr def onDeleteWidget(self): """ Delete temporarily written audio clips :return: Void """ if self.new_tmp_dirs != []: import shutil for i in self.new_tmp_dirs: shutil.rmtree(i)
class OWSVDPlusPlus(OWBaseLearner): # Widget needs a name, or it is considered an abstract widget # and not shown in the menu. name = "SVD++" description = 'Matrix factorization model which makes use of implicit ' \ 'feedback information' icon = "icons/svdplusplus.svg" priority = 80 LEARNER = SVDPlusPlusLearner inputs = [("Feedback information", Table, "set_feedback")] outputs = [("P", Table), ("Q", Table), ("Y", Table)] # Parameters (general) num_factors = settings.Setting(10) num_iter = settings.Setting(15) learning_rate = settings.Setting(0.01) bias_learning_rate = settings.Setting(0.01) lmbda = settings.Setting(0.1) bias_lmbda = settings.Setting(0.1) feedback = None # Seed (Random state) RND_SEED, FIXED_SEED = range(2) seed_type = settings.Setting(RND_SEED) random_seed = settings.Setting(42) # SGD optimizers class _Optimizer: SGD, MOMENTUM, NAG, ADAGRAD, RMSPROP, ADADELTA, ADAM, ADAMAX = range(8) names = [ 'Vanilla SGD', 'Momentum', "Nesterov momentum", 'AdaGrad', 'RMSprop', 'AdaDelta', 'Adam', 'Adamax' ] opt_type = settings.Setting(_Optimizer.SGD) momentum = settings.Setting(0.9) rho = settings.Setting(0.9) beta1 = settings.Setting(0.9) beta2 = settings.Setting(0.999) def add_main_layout(self): # hbox = gui.hBox(self.controlArea, "Settings") # Frist groupbox (Common parameters) box = gui.widgetBox(self.controlArea, "Parameters") gui.spin(box, self, "num_factors", 1, 10000, label="Number of latent factors:", alignment=Qt.AlignRight, callback=self.settings_changed) gui.spin(box, self, "num_iter", 1, 10000, label="Number of iterations:", alignment=Qt.AlignRight, callback=self.settings_changed) gui.doubleSpin(box, self, "learning_rate", minv=1e-5, maxv=1e+5, step=1e-5, label="Learning rate:", decimals=5, alignment=Qt.AlignRight, controlWidth=90, callback=self.settings_changed) gui.doubleSpin(box, self, "bias_learning_rate", minv=1e-5, maxv=1e+5, step=1e-5, label=" Bias learning rate:", decimals=5, alignment=Qt.AlignRight, controlWidth=90, callback=self.settings_changed) gui.doubleSpin(box, self, "lmbda", minv=1e-4, maxv=1e+4, step=1e-4, label="Regularization:", decimals=4, alignment=Qt.AlignRight, controlWidth=90, callback=self.settings_changed) gui.doubleSpin(box, self, "bias_lmbda", minv=1e-4, maxv=1e+4, step=1e-4, label=" Bias regularization:", decimals=4, alignment=Qt.AlignRight, controlWidth=90, callback=self.settings_changed) # Second groupbox (SGD optimizers) box = gui.widgetBox(self.controlArea, "SGD optimizers") gui.comboBox(box, self, "opt_type", label="SGD optimizer: ", items=self._Optimizer.names, orientation=Qt.Horizontal, addSpace=4, callback=self._opt_changed) _m_comp = gui.doubleSpin(box, self, "momentum", minv=1e-4, maxv=1e+4, step=1e-4, label="Momentum:", decimals=4, alignment=Qt.AlignRight, controlWidth=90, callback=self.settings_changed) _r_comp = gui.doubleSpin(box, self, "rho", minv=1e-4, maxv=1e+4, step=1e-4, label="Rho:", decimals=4, alignment=Qt.AlignRight, controlWidth=90, callback=self.settings_changed) _b1_comp = gui.doubleSpin(box, self, "beta1", minv=1e-5, maxv=1e+5, step=1e-4, label="Beta 1:", decimals=5, alignment=Qt.AlignRight, controlWidth=90, callback=self.settings_changed) _b2_comp = gui.doubleSpin(box, self, "beta2", minv=1e-5, maxv=1e+5, step=1e-4, label="Beta 2:", decimals=5, alignment=Qt.AlignRight, controlWidth=90, callback=self.settings_changed) gui.rubber(box) self._opt_params = [_m_comp, _r_comp, _b1_comp, _b2_comp] self._show_right_optimizer() # Third groupbox (Random state) box = gui.widgetBox(self.controlArea, "Random state") rndstate = gui.radioButtons(box, self, "seed_type", callback=self.settings_changed) gui.appendRadioButton(rndstate, "Random seed") gui.appendRadioButton(rndstate, "Fixed seed") ibox = gui.indentedBox(rndstate) self.spin_rnd_seed = gui.spin(ibox, self, "random_seed", -1e5, 1e5, label="Seed:", alignment=Qt.AlignRight, callback=self.settings_changed) self.settings_changed() # Update (extra) settings def settings_changed(self): # Enable/Disable Fixed seed control self.spin_rnd_seed.setEnabled(self.seed_type == self.FIXED_SEED) super().settings_changed() def _show_right_optimizer(self): enabled = [ [False, False, False, False], # SGD [True, False, False, False], # Momentum [True, False, False, False], # NAG [False, False, False, False], # AdaGrad [False, True, False, False], # RMSprop [False, True, False, False], # AdaDelta [False, False, True, True], # Adam [False, False, True, True], # Adamax ] mask = enabled[self.opt_type] for spin, enabled in zip(self._opt_params, mask): [spin.box.hide, spin.box.show][enabled]() def _opt_changed(self): self._show_right_optimizer() self.settings_changed() def select_optimizer(self): if self.opt_type == self._Optimizer.MOMENTUM: return opt.Momentum(momentum=self.momentum) elif self.opt_type == self._Optimizer.NAG: return opt.NesterovMomentum(momentum=self.momentum) elif self.opt_type == self._Optimizer.ADAGRAD: return opt.AdaGrad() elif self.opt_type == self._Optimizer.RMSPROP: return opt.RMSProp(rho=self.rho) elif self.opt_type == self._Optimizer.ADADELTA: return opt.AdaDelta(rho=self.rho) elif self.opt_type == self._Optimizer.ADAM: return opt.Adam(beta1=self.beta1, beta2=self.beta2) elif self.opt_type == self._Optimizer.ADAMAX: return opt.Adamax(beta1=self.beta1, beta2=self.beta2) else: return opt.SGD() def create_learner(self): # Set random state if self.seed_type == self.FIXED_SEED: seed = self.random_seed else: seed = None return self.LEARNER(num_factors=self.num_factors, num_iter=self.num_iter, learning_rate=self.learning_rate, bias_learning_rate=self.bias_learning_rate, lmbda=self.lmbda, bias_lmbda=self.bias_lmbda, feedback=self.feedback, optimizer=self.select_optimizer(), random_state=seed, callback=self.progress_callback) def get_learner_parameters(self): return (("Number of latent factors", self.num_factors), ("Number of iterations", self.num_iter), ("Learning rate", self.learning_rate), ("Bias learning rate", self.bias_learning_rate), ("Regularization", self.lmbda), ("Bias regularization", self.bias_lmbda), ("SGD optimizer", self._Optimizer.names[self.opt_type])) def _check_data(self): self.valid_data = False if self.data is not None: try: # Check ratings data valid_ratings = format_data.check_data(self.data) except Exception as e: valid_ratings = False print('Error checking rating data: ' + str(e)) if not valid_ratings: # Check if it's valid self.Error.data_error("Data not valid for rating models.") else: self.valid_data = True return self.valid_data def update_learner(self): self._check_data() # If our method returns 'False', it could be because there is no data. # But when cross-validating, a learner is required, as the data is in # the widget Test&Score if self.valid_data or self.data is None: super().update_learner() def update_model(self): self._check_data() super().update_model() P = None Q = None Y = None if self.valid_data: P = self.model.getPTable() Q = self.model.getQTable() Y = self.model.getYTable() self.send("P", P) self.send("Q", Q) self.send("Y", Y) def progress_callback(self, *args, **kwargs): iter = args[0] # Start/Finish progress bar if iter == 1: # Start it self.progressBarInit() if iter == self.num_iter: # Finish self.progressBarFinished() return if self.num_iter > 0: self.progressBarSet(int(iter / self.num_iter * 100)) def set_feedback(self, feedback): self.feedback = feedback self.update_learner()
class OWSVMRegression(OWBaseSVM): name = "SVM Regression" description = "Support Vector Machines map inputs to higher-dimensional " \ "feature spaces that best map instances to a linear function. " icon = "icons/SVMRegression.svg" priority = 50 LEARNER = SVRLearner outputs = [("Support vectors", Table, widget.Explicit)] #: SVR types Epsilon_SVR, Nu_SVR = 0, 1 #: Selected SVR type svrtype = settings.Setting(Epsilon_SVR) #: C parameter for Epsilon SVR epsilon_C = settings.Setting(1.0) #: epsilon parameter for Epsilon SVR epsilon = settings.Setting(0.1) #: C parameter for Nu SVR nu_C = settings.Setting(1.0) #: Nu pareter for Nu SVR nu = settings.Setting(0.5) def _add_type_box(self): form = QGridLayout() self.type_box = box = gui.radioButtonsInBox(self.controlArea, self, "svrtype", [], box="SVR Type", orientation=form) self.epsilon_radio = gui.appendRadioButton(box, "ε-SVR", addToLayout=False) self.epsilon_C_spin = gui.doubleSpin(box, self, "epsilon_C", 0.1, 512.0, 0.1, decimals=2, addToLayout=False) self.epsilon_spin = gui.doubleSpin(box, self, "epsilon", 0.1, 512.0, 0.1, decimals=2, addToLayout=False) form.addWidget(self.epsilon_radio, 0, 0, Qt.AlignLeft) form.addWidget(QLabel("Cost (C):"), 0, 1, Qt.AlignRight) form.addWidget(self.epsilon_C_spin, 0, 2) form.addWidget(QLabel("Loss epsilon (ε):"), 1, 1, Qt.AlignRight) form.addWidget(self.epsilon_spin, 1, 2) self.nu_radio = gui.appendRadioButton(box, "ν-SVR", addToLayout=False) self.nu_C_spin = gui.doubleSpin(box, self, "nu_C", 0.1, 512.0, 0.1, decimals=2, addToLayout=False) self.nu_spin = gui.doubleSpin(box, self, "nu", 0.05, 1.0, 0.05, decimals=2, addToLayout=False) form.addWidget(self.nu_radio, 2, 0, Qt.AlignLeft) form.addWidget(QLabel("Cost (C):"), 2, 1, Qt.AlignRight) form.addWidget(self.nu_C_spin, 2, 2) form.addWidget(QLabel("Complexity bound (ν):"), 3, 1, Qt.AlignRight) form.addWidget(self.nu_spin, 3, 2) def create_learner(self): kernel = ["linear", "poly", "rbf", "sigmoid"][self.kernel_type] common_args = dict( kernel=kernel, degree=self.degree, gamma=self.gamma if self.gamma else self._default_gamma, coef0=self.coef0, tol=self.tol, preprocessors=self.preprocessors) if self.svrtype == OWSVMRegression.Epsilon_SVR: return SVRLearner(C=self.epsilon_C, epsilon=self.epsilon, **common_args) else: return NuSVRLearner(C=self.nu_C, nu=self.nu, **common_args) def get_learner_parameters(self): items = OrderedDict() if self.svrtype == 0: items["SVM type"] = \ "ε-SVR, C={}, ε={}".format(self.epsilon_C, self.epsilon) else: items["SVM type"] = "ν-SVR, C={}, ν={}".format(self.nu_C, self.nu) self._report_kernel_parameters(items) items["Numerical tolerance"] = "{:.6}".format(self.tol) return items
class OWPredictions(OWWidget): name = "Predictions" icon = "icons/Predictions.svg" priority = 200 description = "Display the predictions of models for an input data set." inputs = [("Data", Orange.data.Table, "set_data"), ("Predictors", Model, "set_predictor", widget.Multiple)] outputs = [("Predictions", Orange.data.Table), ("Evaluation Results", Orange.evaluation.Results)] class Warning(OWWidget.Warning): empty_data = Msg("Empty data set") class Error(OWWidget.Error): predictor_failed = Msg("One or more predictors failed (see more...)\n{}") settingsHandler = settings.ClassValuesContextHandler() #: Display the full input dataset or only the target variable columns (if #: available) show_attrs = settings.Setting(True) #: Show predicted values (for discrete target variable) show_predictions = settings.Setting(True) #: Show predictions probabilities (for discrete target variable) show_probabilities = settings.Setting(True) #: List of selected class value indices in the "Show probabilities" list selected_classes = settings.ContextSetting([]) #: Draw colored distribution bars draw_dist = settings.Setting(True) output_attrs = settings.Setting(True) output_predictions = settings.Setting(True) output_probabilities = settings.Setting(True) def __init__(self): super().__init__() #: Input data table self.data = None # type: Optional[Orange.data.Table] #: A dict mapping input ids to PredictorSlot self.predictors = OrderedDict() # type: Dict[object, PredictorSlot] #: A class variable (prediction target) self.class_var = None # type: Optional[Orange.data.Variable] #: List of (discrete) class variable's values self.class_values = [] # type: List[str] box = gui.vBox(self.controlArea, "Info") self.infolabel = gui.widgetLabel( box, "No data on input.\nPredictors: 0\nTask: N/A") self.infolabel.setMinimumWidth(150) gui.button(box, self, "Restore Original Order", callback=self._reset_order, tooltip="Show rows in the original order") self.classification_options = box = gui.vBox( self.controlArea, "Show", spacing=-1, addSpace=False) gui.checkBox(box, self, "show_predictions", "Predicted class", callback=self._update_prediction_delegate) b = gui.checkBox(box, self, "show_probabilities", "Predicted probabilities for:", callback=self._update_prediction_delegate) ibox = gui.indentedBox(box, sep=gui.checkButtonOffsetHint(b), addSpace=False) gui.listBox(ibox, self, "selected_classes", "class_values", callback=self._update_prediction_delegate, selectionMode=QListWidget.MultiSelection, addSpace=False) gui.checkBox(box, self, "draw_dist", "Draw distribution bars", callback=self._update_prediction_delegate) box = gui.vBox(self.controlArea, "Data View") gui.checkBox(box, self, "show_attrs", "Show full data set", callback=self._update_column_visibility) box = gui.vBox(self.controlArea, "Output", spacing=-1) self.checkbox_class = gui.checkBox( box, self, "output_attrs", "Original data", callback=self.commit) self.checkbox_class = gui.checkBox( box, self, "output_predictions", "Predictions", callback=self.commit) self.checkbox_prob = gui.checkBox( box, self, "output_probabilities", "Probabilities", callback=self.commit) gui.rubber(self.controlArea) self.splitter = QSplitter( orientation=Qt.Horizontal, childrenCollapsible=False, handleWidth=2, ) self.dataview = TableView( verticalScrollBarPolicy=Qt.ScrollBarAlwaysOn, horizontalScrollBarPolicy=Qt.ScrollBarAlwaysOn, horizontalScrollMode=QTableView.ScrollPerPixel, selectionMode=QTableView.NoSelection, focusPolicy=Qt.StrongFocus ) self.predictionsview = TableView( verticalScrollBarPolicy=Qt.ScrollBarAlwaysOff, horizontalScrollBarPolicy=Qt.ScrollBarAlwaysOn, horizontalScrollMode=QTableView.ScrollPerPixel, selectionMode=QTableView.NoSelection, focusPolicy=Qt.StrongFocus, sortingEnabled=True, ) self.predictionsview.setItemDelegate(PredictionsItemDelegate()) self.dataview.verticalHeader().hide() dsbar = self.dataview.verticalScrollBar() psbar = self.predictionsview.verticalScrollBar() psbar.valueChanged.connect(dsbar.setValue) dsbar.valueChanged.connect(psbar.setValue) self.dataview.verticalHeader().setDefaultSectionSize(22) self.predictionsview.verticalHeader().setDefaultSectionSize(22) self.dataview.verticalHeader().sectionResized.connect( lambda index, _, size: self.predictionsview.verticalHeader().resizeSection(index, size) ) self.splitter.addWidget(self.predictionsview) self.splitter.addWidget(self.dataview) self.mainArea.layout().addWidget(self.splitter) @check_sql_input def set_data(self, data): """Set the input data set""" if data is not None and not len(data): data = None self.Warning.empty_data() else: self.Warning.empty_data.clear() self.data = data if data is None: self.class_var = class_var = None self.dataview.setModel(None) self.predictionsview.setModel(None) self.predictionsview.setItemDelegate(PredictionsItemDelegate()) else: # force full reset of the view's HeaderView state self.class_var = class_var = data.domain.class_var self.dataview.setModel(None) model = TableModel(data, parent=None) modelproxy = TableSortProxyModel() modelproxy.setSourceModel(model) self.dataview.setModel(modelproxy) self._update_column_visibility() discrete_class = class_var is not None and class_var.is_discrete self.classification_options.setVisible(discrete_class) self.closeContext() if discrete_class: self.class_values = list(class_var.values) self.selected_classes = list(range(len(self.class_values))) self.openContext(self.class_var) else: self.class_values = [] self.selected_classes = [] self._invalidate_predictions() def set_predictor(self, predictor=None, id=None): if id in self.predictors: if predictor is not None: self.predictors[id] = self.predictors[id]._replace( predictor=predictor, name=predictor.name, results=None) else: del self.predictors[id] elif predictor is not None: self.predictors[id] = \ PredictorSlot(predictor, predictor.name, None) def handleNewSignals(self): if self.data is not None: self._call_predictors() self._update_predictions_model() self._update_prediction_delegate() self._set_errors() self._update_info() self.commit() def _call_predictors(self): for inputid, pred in self.predictors.items(): if pred.results is None: try: predictor_class = pred.predictor.domain.class_var if predictor_class != self.class_var: results = "{}: mismatching target ({})".format( pred.predictor.name, predictor_class.name) else: results = self.predict(pred.predictor, self.data) except ValueError as err: results = "{}: {}".format(pred.predictor.name, err) self.predictors[inputid] = pred._replace(results=results) def _set_errors(self): errors = "\n".join(p.results for p in self.predictors.values() if isinstance(p.results, str)) if errors: self.Error.predictor_failed(errors) else: self.Error.predictor_failed.clear() def _update_info(self): info = [] if self.data is not None: info.append("Data: {} instances.".format(len(self.data))) else: info.append("Data: N/A") n_predictors = len(self.predictors) n_valid = len(self._valid_predictors()) if n_valid != n_predictors: info.append("Predictors: {} (+ {} failed)".format( n_valid, n_predictors - n_valid)) else: info.append("Predictors: {}".format(n_predictors or "N/A")) if self.class_var is None: info.append("Task: N/A") elif self.class_var.is_discrete: info.append("Task: Classification") self.checkbox_class.setEnabled(True) self.checkbox_prob.setEnabled(True) else: info.append("Task: Regression") self.checkbox_class.setEnabled(False) self.checkbox_prob.setEnabled(False) self.infolabel.setText("\n".join(info)) def _invalidate_predictions(self): for inputid, pred in list(self.predictors.items()): self.predictors[inputid] = pred._replace(results=None) def _valid_predictors(self): return [p for p in self.predictors.values() if p.results is not None and not isinstance(p.results, str)] def _update_predictions_model(self): """Update the prediction view model.""" if self.data is not None: slots = self._valid_predictors() results = [] class_var = self.class_var for p in slots: values, prob = p.results if self.class_var.is_discrete: values = [Value(class_var, v) for v in values] results.append((values, prob)) results = list(zip(*(zip(*res) for res in results))) headers = [p.name for p in slots] model = PredictionsModel(results, headers) else: model = None predmodel = PredictionsSortProxyModel() predmodel.setSourceModel(model) predmodel.setDynamicSortFilter(True) self.predictionsview.setItemDelegate(PredictionsItemDelegate()) self.predictionsview.setModel(predmodel) hheader = self.predictionsview.horizontalHeader() hheader.setSortIndicatorShown(False) # SortFilterProxyModel is slow due to large abstraction overhead # (every comparison triggers multiple `model.index(...)`, # model.rowCount(...), `model.parent`, ... calls) hheader.setSectionsClickable(predmodel.rowCount() < 20000) predmodel.layoutChanged.connect(self._update_data_sort_order) self._update_data_sort_order() self.predictionsview.resizeColumnsToContents() def _update_column_visibility(self): """Update data column visibility.""" if self.data is not None: domain = self.data.domain first_attr = len(domain.class_vars) + len(domain.metas) for i in range(first_attr, first_attr + len(domain.attributes)): self.dataview.setColumnHidden(i, not self.show_attrs) if domain.class_var: self.dataview.setColumnHidden(0, False) def _update_data_sort_order(self): """Update data row order to match the current predictions view order""" datamodel = self.dataview.model() # data model proxy predmodel = self.predictionsview.model() # predictions model proxy sortindicatorshown = False if datamodel is not None: assert isinstance(datamodel, TableSortProxyModel) n = datamodel.rowCount() if predmodel is not None and predmodel.sortColumn() >= 0: sortind = numpy.argsort( [predmodel.mapToSource(predmodel.index(i, 0)).row() for i in range(n)]) sortind = numpy.array(sortind, numpy.int) sortindicatorshown = True else: sortind = None datamodel.setSortIndices(sortind) self.predictionsview.horizontalHeader() \ .setSortIndicatorShown(sortindicatorshown) def _reset_order(self): """Reset the row sorting to original input order.""" datamodel = self.dataview.model() predmodel = self.predictionsview.model() if datamodel is not None: datamodel.sort(-1) if predmodel is not None: predmodel.sort(-1) self.predictionsview.horizontalHeader().setSortIndicatorShown(False) def _update_prediction_delegate(self): """Update the predicted probability visibility state""" if self.class_var is not None: delegate = PredictionsItemDelegate() if self.class_var.is_continuous: self._setup_delegate_continuous(delegate) else: self._setup_delegate_discrete(delegate) proxy = self.predictionsview.model() if proxy is not None: proxy.setProbInd( numpy.array(self.selected_classes, dtype=int)) self.predictionsview.setItemDelegate(delegate) self.predictionsview.resizeColumnsToContents() self._update_spliter() def _setup_delegate_discrete(self, delegate): colors = [QtGui.QColor(*rgb) for rgb in self.class_var.colors] fmt = [] if self.show_probabilities: fmt.append(" : ".join("{{dist[{}]:.2f}}".format(i) for i in sorted(self.selected_classes))) if self.show_predictions: fmt.append("{value!s}") delegate.setFormat(" \N{RIGHTWARDS ARROW} ".join(fmt)) if self.draw_dist and colors is not None: delegate.setColors(colors) return delegate def _setup_delegate_continuous(self, delegate): delegate.setFormat( "{{value:.{}f}}".format(self.class_var.number_of_decimals)) def _update_spliter(self): if self.data is None: return def width(view): h_header = view.horizontalHeader() v_header = view.verticalHeader() return h_header.length() + v_header.width() w = width(self.predictionsview) + 4 w1, w2 = self.splitter.sizes() self.splitter.setSizes([w, w1 + w2 - w]) def commit(self): self._commit_predictions() self._commit_evaluation_results() def _commit_evaluation_results(self): class_var = self.class_var slots = self._valid_predictors() if not slots: self.send("Evaluation Results", None) return nanmask = numpy.isnan(self.data.get_column_view(class_var)[0]) data = self.data[~nanmask] N = len(data) results = Orange.evaluation.Results(data, store_data=True) results.folds = None results.row_indices = numpy.arange(N) results.actual = data.Y.ravel() results.predicted = numpy.vstack( tuple(p.results[0][~nanmask] for p in slots)) if class_var and class_var.is_discrete: results.probabilities = numpy.array( [p.results[1][~nanmask] for p in slots]) results.learner_names = [p.name for p in slots] self.send("Evaluation Results", results) def _commit_predictions(self): slots = self._valid_predictors() if not slots: self.send("Predictions", None) return class_var = self.class_var if class_var and class_var.is_discrete: newmetas, newcolumns = self._classification_output_columns() else: newmetas, newcolumns = self._regression_output_columns() attrs = list(self.data.domain.attributes) if self.output_attrs else [] metas = list(self.data.domain.metas) + newmetas domain = Orange.data.Domain(attrs, class_var, metas=metas) predictions = self.data.from_table(domain, self.data) if newcolumns: newcolumns = numpy.hstack( [numpy.atleast_2d(cols) for cols in newcolumns]) predictions.metas[:, -newcolumns.shape[1]:] = newcolumns self.send("Predictions", predictions) def _classification_output_columns(self): newmetas = [] newcolumns = [] slots = self._valid_predictors() if self.output_predictions: newmetas += [DiscreteVariable(name=p.name, values=self.class_values) for p in slots] newcolumns += [p.results[0].reshape((-1, 1)) for p in slots] if self.output_probabilities: newmetas += [ContinuousVariable(name="%s(%s)" % (p.name, value)) for p in slots for value in self.class_values] newcolumns += [p.results[1] for p in slots] return newmetas, newcolumns def _regression_output_columns(self): slots = self._valid_predictors() newmetas = [ContinuousVariable(name=p.name) for p in slots] newcolumns = [p.results[0].reshape((-1, 1)) for p in slots] return newmetas, newcolumns def send_report(self): def merge_data_with_predictions(): data_model = self.dataview.model() predictions_model = self.predictionsview.model() # use ItemDelegate to style prediction values style = lambda x: self.predictionsview.itemDelegate().displayText(x, QLocale()) # iterate only over visible columns of data's QTableView iter_data_cols = list(filter(lambda x: not self.dataview.isColumnHidden(x), range(data_model.columnCount()))) # print header yield [''] + \ [predictions_model.headerData(col, Qt.Horizontal, Qt.DisplayRole) for col in range(predictions_model.columnCount())] + \ [data_model.headerData(col, Qt.Horizontal, Qt.DisplayRole) for col in iter_data_cols] # print data & predictions for i in range(data_model.rowCount()): yield [data_model.headerData(i, Qt.Vertical, Qt.DisplayRole)] + \ [style(predictions_model.data(predictions_model.index(i, j))) for j in range(predictions_model.columnCount())] + \ [data_model.data(data_model.index(i, j)) for j in iter_data_cols] if self.data is not None: text = self.infolabel.text().replace('\n', '<br>') if self.show_probabilities and self.selected_classes: text += '<br>Showing probabilities for: ' text += ', '. join([self.class_values[i] for i in self.selected_classes]) self.report_paragraph('Info', text) self.report_table("Data & Predictions", merge_data_with_predictions(), header_rows=1, header_columns=1) @classmethod def predict(cls, predictor, data): class_var = predictor.domain.class_var if class_var: if class_var.is_discrete: return cls.predict_discrete(predictor, data) elif class_var.is_continuous: return cls.predict_continuous(predictor, data) @staticmethod def predict_discrete(predictor, data): return predictor(data, Model.ValueProbs) @staticmethod def predict_continuous(predictor, data): values = predictor(data, Model.Value) return values, [None] * len(data)
class OWYahooFinance(widget.OWWidget): name = 'Yahoo Finance' description = "Generate time series from Yahoo Finance stock market data." icon = 'icons/YahooFinance.svg' priority = 9 class Outputs: time_series = Output("Time series", Timeseries) QT_DATE_FORMAT = 'yyyy-MM-dd' PY_DATE_FORMAT = '%Y-%m-%d' MIN_DATE = date(1851, 1, 1) date_from = settings.Setting( (datetime.now().date() - timedelta(5 * 365)).strftime(PY_DATE_FORMAT)) date_to = settings.Setting(datetime.now().date().strftime(PY_DATE_FORMAT)) symbols = settings.Setting( ['AMZN', 'AAPL', 'GOOG', 'FB', 'SPY', '^DJI', '^TNX']) want_main_area = False resizing_enabled = False class Error(widget.OWWidget.Error): download_error = widget.Msg('Failed to download data (HTTP Error {}). ' 'Wrong stock symbol?') def __init__(self): box = gui.widgetBox(self.controlArea, 'Yahoo Finance Stock Data', orientation='horizontal') lbox = gui.widgetBox(box, orientation='vertical') hbox = gui.widgetBox(lbox, orientation='horizontal') gui.label(hbox, self, 'Ticker:') self.combo = combo = QComboBox(editable=True, insertPolicy=QComboBox.InsertAtTop) combo.addItems(self.symbols) hbox.layout().addWidget(combo) # combo = gui.comboBox( # lbox, self, 'symbol',#, items=self.symbols, # label='Ticker:', orientation='horizontal', # editable=True, maximumContentsLength=-1) gui.rubber(combo.parentWidget()) minDate = QDate.fromString(self.MIN_DATE.strftime(self.PY_DATE_FORMAT), self.QT_DATE_FORMAT) date_from = QDateEdit(QDate.fromString(self.date_from, self.QT_DATE_FORMAT), displayFormat=self.QT_DATE_FORMAT, minimumDate=minDate, calendarPopup=True) date_to = QDateEdit(QDate.fromString(self.date_to, self.QT_DATE_FORMAT), displayFormat=self.QT_DATE_FORMAT, minimumDate=minDate, calendarPopup=True) date_from.dateChanged.connect(lambda date: setattr( self, 'date_from', date.toString(self.QT_DATE_FORMAT))) date_to.dateChanged.connect(lambda date: setattr( self, 'date_to', date.toString(self.QT_DATE_FORMAT))) hbox = gui.hBox(lbox) gui.label(hbox, self, "From:") hbox.layout().addWidget(date_from) hbox = gui.hBox(lbox) gui.label(hbox, self, "To:") hbox.layout().addWidget(date_to) self.button = gui.button(self.controlArea, self, 'Download', callback=self.download) def download(self): date_from = datetime.strptime(self.date_from, self.PY_DATE_FORMAT) date_to = datetime.strptime(self.date_to, self.PY_DATE_FORMAT) # Update symbol in symbols history symbol = self.combo.currentText().strip().upper() self.combo.removeItem(self.combo.currentIndex()) self.combo.insertItem(0, symbol) self.combo.setCurrentIndex(0) try: self.symbols.remove(symbol) except ValueError: pass self.symbols.insert(0, symbol) if not symbol: return self.Error.clear() with self.progressBar(3) as progress: try: progress.advance() self.button.setDisabled(True) data = finance_data(symbol, date_from, date_to) self.Outputs.time_series.send(data) except Exception as e: self.Error.download_error(getattr(e, 'status', -1)) finally: self.button.setDisabled(False)
class OWPCA(widget.OWWidget): name = "PCA" description = "Principal component analysis with a scree-diagram." icon = "icons/PCA.svg" priority = 3050 keywords = ["principal component analysis", "linear transformation"] class Inputs: data = Input("Data", Table) class Outputs: transformed_data = Output("Transformed data", Table) components = Output("Components", Table) pca = Output("PCA", PCA, dynamic=False) settingsHandler = settings.DomainContextHandler() ncomponents = settings.Setting(2) variance_covered = settings.Setting(100) batch_size = settings.Setting(100) address = settings.Setting('') auto_update = settings.Setting(True) auto_commit = settings.Setting(True) normalize = settings.ContextSetting(True) decomposition_idx = settings.ContextSetting(0) maxp = settings.Setting(20) axis_labels = settings.Setting(10) graph_name = "plot.plotItem" class Warning(widget.OWWidget.Warning): trivial_components = widget.Msg( "All components of the PCA are trivial (explain 0 variance). " "Input data is constant (or near constant).") class Error(widget.OWWidget.Error): no_features = widget.Msg("At least 1 feature is required") no_instances = widget.Msg("At least 1 data instance is required") sparse_data = widget.Msg("Sparse data is not supported") def __init__(self): super().__init__() self.data = None self._pca = None self._transformed = None self._variance_ratio = None self._cumulative = None self._line = False self._init_projector() # Components Selection box = gui.vBox(self.controlArea, "Components Selection") form = QFormLayout() box.layout().addLayout(form) self.components_spin = gui.spin( box, self, "ncomponents", 1, MAX_COMPONENTS, callback=self._update_selection_component_spin, keyboardTracking=False) self.components_spin.setSpecialValueText("All") self.variance_spin = gui.spin( box, self, "variance_covered", 1, 100, callback=self._update_selection_variance_spin, keyboardTracking=False) self.variance_spin.setSuffix("%") form.addRow("Components:", self.components_spin) form.addRow("Variance covered:", self.variance_spin) # Incremental learning self.sampling_box = gui.vBox(self.controlArea, "Incremental learning") self.addresstext = QLineEdit(box) self.addresstext.setPlaceholderText('Remote server') if self.address: self.addresstext.setText(self.address) self.sampling_box.layout().addWidget(self.addresstext) form = QFormLayout() self.sampling_box.layout().addLayout(form) self.batch_spin = gui.spin(self.sampling_box, self, "batch_size", 50, 100000, step=50, keyboardTracking=False) form.addRow("Batch size ~ ", self.batch_spin) self.start_button = gui.button( self.sampling_box, self, "Start remote computation", callback=self.start, autoDefault=False, tooltip="Start/abort computation on the server") self.start_button.setEnabled(False) gui.checkBox(self.sampling_box, self, "auto_update", "Periodically fetch model", callback=self.update_model) self.__timer = QTimer(self, interval=2000) self.__timer.timeout.connect(self.get_model) self.sampling_box.setVisible(remotely) # Decomposition self.decomposition_box = gui.radioButtons( self.controlArea, self, "decomposition_idx", [d.name for d in DECOMPOSITIONS], box="Decomposition", callback=self._update_decomposition) # Options self.options_box = gui.vBox(self.controlArea, "Options") self.normalize_box = gui.checkBox(self.options_box, self, "normalize", "Normalize data", callback=self._update_normalize) self.maxp_spin = gui.spin(self.options_box, self, "maxp", 1, MAX_COMPONENTS, label="Show only first", callback=self._setup_plot, keyboardTracking=False) self.controlArea.layout().addStretch() gui.auto_commit(self.controlArea, self, "auto_commit", "Apply", checkbox_label="Apply automatically") self.plot = pg.PlotWidget(background="w") axis = self.plot.getAxis("bottom") axis.setLabel("Principal Components") axis = self.plot.getAxis("left") axis.setLabel("Proportion of variance") self.plot_horlabels = [] self.plot_horlines = [] self.plot.getViewBox().setMenuEnabled(False) self.plot.getViewBox().setMouseEnabled(False, False) self.plot.showGrid(True, True, alpha=0.5) self.plot.setRange(xRange=(0.0, 1.0), yRange=(0.0, 1.0)) self.mainArea.layout().addWidget(self.plot) self._update_normalize() def update_model(self): self.get_model() if self.auto_update and self.rpca and not self.rpca.ready(): self.__timer.start(2000) else: self.__timer.stop() def update_buttons(self, sparse_data=False): if sparse_data: self.normalize = False buttons = self.decomposition_box.buttons for cls, button in zip(DECOMPOSITIONS, buttons): button.setDisabled(sparse_data and not cls.supports_sparse) if not buttons[self.decomposition_idx].isEnabled(): # Set decomposition index to first sparse-enabled decomposition for i, cls in enumerate(DECOMPOSITIONS): if cls.supports_sparse: self.decomposition_idx = i break self._init_projector() def start(self): if 'Abort' in self.start_button.text(): self.rpca.abort() self.__timer.stop() self.start_button.setText("Start remote computation") else: self.address = self.addresstext.text() with remote.server(self.address): from Orange.projection.pca import RemotePCA maxiter = (1e5 + self.data.approx_len()) / self.batch_size * 3 self.rpca = RemotePCA(self.data, self.batch_size, int(maxiter)) self.update_model() self.start_button.setText("Abort remote computation") @Inputs.data def set_data(self, data): self.closeContext() self.clear_messages() self.clear() self.start_button.setEnabled(False) self.information() self.data = None if isinstance(data, SqlTable): if data.approx_len() < AUTO_DL_LIMIT: data = Table(data) elif not remotely: self.information("Data has been sampled") data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(2000, partial=True) data = Table(data_sample) else: # data was big and remote available self.sampling_box.setVisible(True) self.start_button.setText("Start remote computation") self.start_button.setEnabled(True) if not isinstance(data, SqlTable): self.sampling_box.setVisible(False) if isinstance(data, Table): if len(data.domain.attributes) == 0: self.Error.no_features() self.clear_outputs() return if len(data) == 0: self.Error.no_instances() self.clear_outputs() return self.openContext(data) sparse_data = data is not None and data.is_sparse() self.normalize_box.setDisabled(sparse_data) self.update_buttons(sparse_data=sparse_data) self.data = data self.fit() def fit(self): self.clear() self.Warning.trivial_components.clear() if self.data is None: return data = self.data self._pca_projector.preprocessors = \ self._pca_preprocessors + ([Normalize()] if self.normalize else []) if not isinstance(data, SqlTable): pca = self._pca_projector(data) variance_ratio = pca.explained_variance_ratio_ cumulative = numpy.cumsum(variance_ratio) if numpy.isfinite(cumulative[-1]): self.components_spin.setRange(0, len(cumulative)) self._pca = pca self._variance_ratio = variance_ratio self._cumulative = cumulative self._setup_plot() else: self.Warning.trivial_components() self.unconditional_commit() def clear(self): self._pca = None self._transformed = None self._variance_ratio = None self._cumulative = None self._line = None self.plot_horlabels = [] self.plot_horlines = [] self.plot.clear() def clear_outputs(self): self.Outputs.transformed_data.send(None) self.Outputs.components.send(None) self.Outputs.pca.send(self._pca_projector) def get_model(self): if self.rpca is None: return if self.rpca.ready(): self.__timer.stop() self.start_button.setText("Restart (finished)") self._pca = self.rpca.get_state() if self._pca is None: return self._variance_ratio = self._pca.explained_variance_ratio_ self._cumulative = numpy.cumsum(self._variance_ratio) self._setup_plot() self._transformed = None self.commit() def _setup_plot(self): self.plot.clear() if self._pca is None: return explained_ratio = self._variance_ratio explained = self._cumulative p = min(len(self._variance_ratio), self.maxp) self.plot.plot(numpy.arange(p), explained_ratio[:p], pen=pg.mkPen(QColor(Qt.red), width=2), antialias=True, name="Variance") self.plot.plot(numpy.arange(p), explained[:p], pen=pg.mkPen(QColor(Qt.darkYellow), width=2), antialias=True, name="Cumulative Variance") cutpos = self._nselected_components() - 1 self._line = pg.InfiniteLine(angle=90, pos=cutpos, movable=True, bounds=(0, p - 1)) self._line.setCursor(Qt.SizeHorCursor) self._line.setPen(pg.mkPen(QColor(Qt.black), width=2)) self._line.sigPositionChanged.connect(self._on_cut_changed) self.plot.addItem(self._line) self.plot_horlines = ( pg.PlotCurveItem(pen=pg.mkPen(QColor(Qt.blue), style=Qt.DashLine)), pg.PlotCurveItem(pen=pg.mkPen(QColor(Qt.blue), style=Qt.DashLine))) self.plot_horlabels = (pg.TextItem(color=QColor(Qt.black), anchor=(1, 0)), pg.TextItem(color=QColor(Qt.black), anchor=(1, 1))) for item in self.plot_horlabels + self.plot_horlines: self.plot.addItem(item) self._set_horline_pos() self.plot.setRange(xRange=(0.0, p - 1), yRange=(0.0, 1.0)) self._update_axis() def _set_horline_pos(self): cutidx = self.ncomponents - 1 for line, label, curve in zip( self.plot_horlines, self.plot_horlabels, (self._variance_ratio, self._cumulative)): y = curve[cutidx] line.setData([-1, cutidx], 2 * [y]) label.setPos(cutidx, y) label.setPlainText("{:.3f}".format(y)) def _on_cut_changed(self, line): # cut changed by means of a cut line over the scree plot. value = int(round(line.value())) self._line.setValue(value) current = self._nselected_components() components = value + 1 if not (self.ncomponents == 0 and components == len(self._variance_ratio)): self.ncomponents = components self._set_horline_pos() if self._pca is not None: var = self._cumulative[components - 1] if numpy.isfinite(var): self.variance_covered = int(var * 100) if current != self._nselected_components(): self._invalidate_selection() def _update_selection_component_spin(self): # cut changed by "ncomponents" spin. if self._pca is None: self._invalidate_selection() return if self.ncomponents == 0: # Special "All" value cut = len(self._variance_ratio) else: cut = self.ncomponents var = self._cumulative[cut - 1] if numpy.isfinite(var): self.variance_covered = int(var * 100) if numpy.floor(self._line.value()) + 1 != cut: self._line.setValue(cut - 1) self._invalidate_selection() def _update_selection_variance_spin(self): # cut changed by "max variance" spin. if self._pca is None: return cut = numpy.searchsorted(self._cumulative, self.variance_covered / 100.0) + 1 cut = min(cut, len(self._cumulative)) self.ncomponents = cut if numpy.floor(self._line.value()) + 1 != cut: self._line.setValue(cut - 1) self._invalidate_selection() def _update_normalize(self): self.fit() if self.data is None: self._invalidate_selection() def _init_projector(self): cls = DECOMPOSITIONS[self.decomposition_idx] self._pca_projector = cls(n_components=MAX_COMPONENTS) self._pca_projector.component = self.ncomponents self._pca_preprocessors = cls.preprocessors def _update_decomposition(self): self._init_projector() self._update_normalize() def _nselected_components(self): """Return the number of selected components.""" if self._pca is None: return 0 if self.ncomponents == 0: # Special "All" value max_comp = len(self._variance_ratio) else: max_comp = self.ncomponents var_max = self._cumulative[max_comp - 1] if var_max != numpy.floor(self.variance_covered / 100.0): cut = max_comp assert numpy.isfinite(var_max) self.variance_covered = int(var_max * 100) else: self.ncomponents = cut = numpy.searchsorted( self._cumulative, self.variance_covered / 100.0) + 1 return cut def _invalidate_selection(self): self.commit() def _update_axis(self): p = min(len(self._variance_ratio), self.maxp) axis = self.plot.getAxis("bottom") d = max((p - 1) // (self.axis_labels - 1), 1) axis.setTicks([[(i, str(i + 1)) for i in range(0, p, d)]]) def commit(self): transformed = components = None if self._pca is not None: if self._transformed is None: # Compute the full transform (MAX_COMPONENTS components) only once. self._transformed = self._pca(self.data) transformed = self._transformed domain = Domain(transformed.domain.attributes[:self.ncomponents], self.data.domain.class_vars, self.data.domain.metas) transformed = transformed.from_table(domain, transformed) # prevent caching new features by defining compute_value dom = Domain([ ContinuousVariable(a.name, compute_value=lambda _: None) for a in self._pca.orig_domain.attributes ], metas=[StringVariable(name='component')]) metas = numpy.array( [['PC{}'.format(i + 1) for i in range(self.ncomponents)]], dtype=object).T components = Table(dom, self._pca.components_[:self.ncomponents], metas=metas) components.name = 'components' self._pca_projector.component = self.ncomponents self.Outputs.transformed_data.send(transformed) self.Outputs.components.send(components) self.Outputs.pca.send(self._pca_projector) def send_report(self): if self.data is None: return self.report_items( (("Decomposition", DECOMPOSITIONS[self.decomposition_idx].name), ("Normalize data", str(self.normalize)), ("Selected components", self.ncomponents), ("Explained variance", "{:.3f} %".format(self.variance_covered)))) self.report_plot() @classmethod def migrate_settings(cls, settings, version): if "variance_covered" in settings: # Due to the error in gh-1896 the variance_covered was persisted # as a NaN value, causing a TypeError in the widgets `__init__`. vc = settings["variance_covered"] if isinstance(vc, numbers.Real): if numpy.isfinite(vc): vc = int(vc) else: vc = 100 settings["variance_covered"] = vc if settings.get("ncomponents", 0) > MAX_COMPONENTS: settings["ncomponents"] = MAX_COMPONENTS
class OWPythagoreanForest(OWWidget): name = 'Pythagorean Forest' description = '毕达哥拉斯森林用于可视化随机森林' icon = 'icons/PythagoreanForest.svg' settings_version = 2 keywords = ["fractal"] priority = 1001 class Inputs: random_forest = Input("Random forest", RandomForestModel) class Outputs: tree = Output("Tree", TreeModel) # Enable the save as feature graph_name = '场景' # Settings depth_limit = settings.ContextSetting(10) target_class_index = settings.ContextSetting(0) size_calc_idx = settings.Setting(0) zoom = settings.Setting(200) SIZE_CALCULATION = [ ('Normal', lambda x: x), ('Square root', lambda x: sqrt(x)), ('Logarithmic', lambda x: log(x + 1)), ] @classmethod def migrate_settings(cls, settings, version): if version < 2: settings.pop('selected_tree_index', None) v1_min, v1_max = 20, 150 v2_min, v2_max = 100, 400 ratio = (v2_max - v2_min) / (v1_max - v1_min) settings['zoom'] = int(ratio * (settings['zoom'] - v1_min) + v2_min) def __init__(self): super().__init__() self.rf_model = None self.forest = None self.instances = None self.clf_dataset = None self.color_palette = None # CONTROL AREA # Tree info area box_info = gui.widgetBox(self.controlArea, 'Forest') self.ui_info = gui.widgetLabel(box_info) # Display controls area box_display = gui.widgetBox(self.controlArea, 'Display') self.ui_depth_slider = gui.hSlider( box_display, self, 'depth_limit', label='Depth', ticks=False, ) # type: QSlider self.ui_target_class_combo = gui.comboBox( box_display, self, 'target_class_index', label='Target class', orientation=Qt.Horizontal, items=[], contentsLength=8, ) # type: gui.OrangeComboBox self.ui_size_calc_combo = gui.comboBox( box_display, self, 'size_calc_idx', label='Size', orientation=Qt.Horizontal, items=list(zip(*self.SIZE_CALCULATION))[0], contentsLength=8, ) # type: gui.OrangeComboBox self.ui_zoom_slider = gui.hSlider( box_display, self, 'zoom', label='Zoom', ticks=False, minValue=100, maxValue=400, createLabel=False, intOnly=False, ) # type: QSlider # Stretch to fit the rest of the unsused area gui.rubber(self.controlArea) self.controlArea.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Expanding) # MAIN AREA self.forest_model = PythagoreanForestModel(parent=self) self.forest_model.update_item_size(self.zoom) self.ui_depth_slider.valueChanged.connect( self.forest_model.update_depth) self.ui_target_class_combo.currentIndexChanged.connect( self.forest_model.update_target_class) self.ui_zoom_slider.valueChanged.connect( self.forest_model.update_item_size) self.ui_size_calc_combo.currentIndexChanged.connect( self.forest_model.update_size_calc) self.list_delegate = PythagorasTreeDelegate(parent=self) self.list_view = ClickToClearSelectionListView(parent=self) self.list_view.setWrapping(True) self.list_view.setFlow(QListView.LeftToRight) self.list_view.setResizeMode(QListView.Adjust) self.list_view.setModel(self.forest_model) self.list_view.setItemDelegate(self.list_delegate) self.list_view.setSpacing(2) self.list_view.setSelectionMode(QListView.SingleSelection) self.list_view.selectionModel().selectionChanged.connect(self.commit) self.list_view.setUniformItemSizes(True) self.mainArea.layout().addWidget(self.list_view) self.resize(800, 500) # Clear to set sensible default values self.clear() @Inputs.random_forest def set_rf(self, model=None): """When a different forest is given.""" self.clear() self.rf_model = model if model is not None: self.forest = self._get_forest_adapter(self.rf_model) self.forest_model[:] = self.forest.trees self.instances = model.instances # This bit is important for the regression classifier if self.instances is not None and self.instances.domain != model.domain: self.clf_dataset = self.instances.transform(self.rf_model.domain) else: self.clf_dataset = self.instances self._update_info_box() self._update_target_class_combo() self._update_depth_slider() def clear(self): """Clear all relevant data from the widget.""" self.rf_model = None self.forest = None self.forest_model.clear() self._clear_info_box() self._clear_target_class_combo() self._clear_depth_slider() def _update_info_box(self): self.ui_info.setText('Trees: {}'.format(len(self.forest.trees))) def _update_depth_slider(self): self.depth_limit = self._get_max_depth() self.ui_depth_slider.parent().setEnabled(True) self.ui_depth_slider.setMaximum(self.depth_limit) self.ui_depth_slider.setValue(self.depth_limit) def _update_target_class_combo(self): self._clear_target_class_combo() label = [x for x in self.ui_target_class_combo.parent().children() if isinstance(x, QLabel)][0] if self.instances.domain.has_discrete_class: label_text = '目标类' values = [c.title() for c in self.instances.domain.class_vars[0].values] values.insert(0, 'None') else: label_text = '节点颜色' values = list(ContinuousTreeNode.COLOR_METHODS.keys()) label.setText(label_text) self.ui_target_class_combo.addItems(values) self.ui_target_class_combo.setCurrentIndex(self.target_class_index) def _clear_info_box(self): self.ui_info.setText('没有森林输入') def _clear_target_class_combo(self): self.ui_target_class_combo.clear() self.target_class_index = 0 self.ui_target_class_combo.setCurrentIndex(self.target_class_index) def _clear_depth_slider(self): self.ui_depth_slider.parent().setEnabled(False) self.ui_depth_slider.setMaximum(0) def _get_max_depth(self): return max(tree.max_depth for tree in self.forest.trees) def _get_forest_adapter(self, model): return SklRandomForestAdapter(model) def onDeleteWidget(self): """When deleting the widget.""" super().onDeleteWidget() self.clear() def commit(self, selection): # type: (QItemSelection) -> None """Commit the selected tree to output.""" selected_indices = selection.indexes() if not len(selected_indices): self.Outputs.tree.send(None) return selected_index, = selection.indexes() idx = selected_index.row() tree = self.rf_model.trees[idx] tree.instances = self.instances tree.meta_target_class_index = self.target_class_index tree.meta_size_calc_idx = self.size_calc_idx tree.meta_depth_limit = self.depth_limit self.Outputs.tree.send(tree) def send_report(self): """Send report.""" self.report_plot()
class OWImportDocuments(widget.OWWidget): name = "Import Documents" description = "Import text documents from folders." icon = "icons/ImportDocuments.svg" priority = 110 class Outputs: data = Output("Corpus", Corpus) skipped_documents = Output("Skipped documents", Table) LOCAL_FILE, URL = range(2) source = settings.Setting(LOCAL_FILE) #: list of recent paths recent_paths: List[RecentPath] = settings.Setting([]) currentPath: Optional[str] = settings.Setting(None) recent_urls: List[str] = settings.Setting([]) want_main_area = False resizing_enabled = False Modality = Qt.ApplicationModal MaxRecentItems = 20 class Warning(widget.OWWidget.Warning): read_error = widget.Msg("{} couldn't be read.") def __init__(self): super().__init__() #: widget's runtime state self.__state = State.NoState self.corpus = None self.n_text_categories = 0 self.n_text_data = 0 self.skipped_documents = [] self.__invalidated = False self.__pendingTask = None layout = QGridLayout() layout.setSpacing(4) gui.widgetBox(self.controlArea, orientation=layout, box='Source') source_box = gui.radioButtons(None, self, "source", box=True, callback=self.start, addToLayout=False) rb_button = gui.appendRadioButton(source_box, "Folder:", addToLayout=False) layout.addWidget(rb_button, 0, 0, Qt.AlignVCenter) box = gui.hBox(None, addToLayout=False, margin=0) box.setSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed) self.recent_cb = QComboBox( sizeAdjustPolicy=QComboBox.AdjustToMinimumContentsLengthWithIcon, minimumContentsLength=16, acceptDrops=True ) self.recent_cb.installEventFilter(self) self.recent_cb.activated[int].connect(self.__onRecentActivated) browseaction = QAction( "Open/Load Documents", self, iconText="\N{HORIZONTAL ELLIPSIS}", icon=self.style().standardIcon(QStyle.SP_DirOpenIcon), toolTip="Select a folder from which to load the documents" ) browseaction.triggered.connect(self.__runOpenDialog) reloadaction = QAction( "Reload", self, icon=self.style().standardIcon(QStyle.SP_BrowserReload), toolTip="Reload current document set" ) reloadaction.triggered.connect(self.reload) self.__actions = namespace( browse=browseaction, reload=reloadaction, ) browsebutton = QPushButton( browseaction.iconText(), icon=browseaction.icon(), toolTip=browseaction.toolTip(), clicked=browseaction.trigger, default=False, autoDefault=False, ) reloadbutton = QPushButton( reloadaction.iconText(), icon=reloadaction.icon(), clicked=reloadaction.trigger, default=False, autoDefault=False, ) box.layout().addWidget(self.recent_cb) layout.addWidget(box, 0, 1) layout.addWidget(browsebutton, 0, 2) layout.addWidget(reloadbutton, 0, 3) rb_button = gui.appendRadioButton(source_box, "URL:", addToLayout=False) layout.addWidget(rb_button, 3, 0, Qt.AlignVCenter) self.url_combo = url_combo = QComboBox() url_model = PyListModel() url_model.wrap(self.recent_urls) url_combo.setLineEdit(LineEditSelectOnFocus()) url_combo.setModel(url_model) url_combo.setSizePolicy(QSizePolicy.Ignored, QSizePolicy.Fixed) url_combo.setEditable(True) url_combo.setInsertPolicy(url_combo.InsertAtTop) url_edit = url_combo.lineEdit() l, t, r, b = url_edit.getTextMargins() url_edit.setTextMargins(l + 5, t, r, b) layout.addWidget(url_combo, 3, 1, 1, 3) url_combo.activated.connect(self._url_set) # whit completer we set that combo box is case sensitive when # matching the history completer = QCompleter() completer.setCaseSensitivity(Qt.CaseSensitive) url_combo.setCompleter(completer) self.addActions([browseaction, reloadaction]) reloadaction.changed.connect( lambda: reloadbutton.setEnabled(reloadaction.isEnabled()) ) box = gui.vBox(self.controlArea, "Info") self.infostack = QStackedWidget() self.info_area = QLabel( text="No document set selected", wordWrap=True ) self.progress_widget = QProgressBar( minimum=0, maximum=100 ) self.cancel_button = QPushButton( "Cancel", icon=self.style().standardIcon(QStyle.SP_DialogCancelButton), default=False, autoDefault=False, ) self.cancel_button.clicked.connect(self.cancel) w = QWidget() vlayout = QVBoxLayout() vlayout.setContentsMargins(0, 0, 0, 0) hlayout = QHBoxLayout() hlayout.setContentsMargins(0, 0, 0, 0) hlayout.addWidget(self.progress_widget) hlayout.addWidget(self.cancel_button) vlayout.addLayout(hlayout) self.pathlabel = TextLabel() self.pathlabel.setTextElideMode(Qt.ElideMiddle) self.pathlabel.setAttribute(Qt.WA_MacSmallSize) vlayout.addWidget(self.pathlabel) w.setLayout(vlayout) self.infostack.addWidget(self.info_area) self.infostack.addWidget(w) box.layout().addWidget(self.infostack) self.__initRecentItemsModel() self.__invalidated = True self.__executor = ThreadExecutor(self) QApplication.postEvent(self, QEvent(RuntimeEvent.Init)) def _url_set(self): url = self.url_combo.currentText() pos = self.recent_urls.index(url) url = url.strip() if not urlparse(url).scheme: url = "http://" + url self.url_combo.setItemText(pos, url) self.recent_urls[pos] = url self.source = self.URL self.start() def __initRecentItemsModel(self): if self.currentPath is not None and \ not os.path.isdir(self.currentPath): self.currentPath = None recent_paths = [] for item in self.recent_paths: if os.path.isdir(item.abspath): recent_paths.append(item) recent_paths = recent_paths[:OWImportDocuments.MaxRecentItems] recent_model = self.recent_cb.model() for pathitem in recent_paths: item = RecentPath_asqstandarditem(pathitem) recent_model.appendRow(item) self.recent_paths = recent_paths if self.currentPath is not None and \ os.path.isdir(self.currentPath) and self.recent_paths and \ os.path.samefile(self.currentPath, self.recent_paths[0].abspath): self.recent_cb.setCurrentIndex(0) else: self.currentPath = None self.recent_cb.setCurrentIndex(-1) self.__actions.reload.setEnabled(self.currentPath is not None) def customEvent(self, event): """Reimplemented.""" if event.type() == RuntimeEvent.Init: if self.__invalidated: try: self.start() finally: self.__invalidated = False super().customEvent(event) def __runOpenDialog(self): startdir = os.path.expanduser("~/") if self.recent_paths: startdir = os.path.dirname(self.recent_paths[0].abspath) caption = "Select Top Level Folder" if OWImportDocuments.Modality == Qt.WindowModal: dlg = QFileDialog( self, caption, startdir, acceptMode=QFileDialog.AcceptOpen, modal=True, ) dlg.setFileMode(QFileDialog.Directory) dlg.setOption(QFileDialog.ShowDirsOnly) dlg.setDirectory(startdir) dlg.setAttribute(Qt.WA_DeleteOnClose) @dlg.accepted.connect def on_accepted(): dirpath = dlg.selectedFiles() if dirpath: self.setCurrentPath(dirpath[0]) self.start() dlg.open() else: dirpath = QFileDialog.getExistingDirectory( self, caption, startdir ) if dirpath: self.setCurrentPath(dirpath) self.start() def __onRecentActivated(self, index): item = self.recent_cb.itemData(index) if item is None: return assert isinstance(item, RecentPath) self.setCurrentPath(item.abspath) self.start() def __updateInfo(self): if self.__state == State.NoState: text = "No document set selected" elif self.__state == State.Processing: text = "Processing" elif self.__state == State.Done: nvalid = self.n_text_data ncategories = self.n_text_categories n_skipped = len(self.skipped_documents) if ncategories < 2: text = "{} document{}".format(nvalid, "s" if nvalid != 1 else "") else: text = "{} documents / {} categories".format(nvalid, ncategories) if n_skipped > 0: text = text + ", {} skipped".format(n_skipped) elif self.__state == State.Cancelled: text = "Cancelled" elif self.__state == State.Error: text = "Error state" else: assert False self.info_area.setText(text) if self.__state == State.Processing: self.infostack.setCurrentIndex(1) else: self.infostack.setCurrentIndex(0) def setCurrentPath(self, path): """ Set the current root text path to path If the path does not exists or is not a directory the current path is left unchanged Parameters ---------- path : str New root import path. Returns ------- status : bool True if the current root import path was successfully changed to path. """ if self.currentPath is not None and path is not None and \ os.path.isdir(self.currentPath) and os.path.isdir(path) and \ os.path.samefile(self.currentPath, path) and \ self.source == self.LOCAL_FILE: return True success = True error = None if path is not None: if not os.path.exists(path): error = "'{}' does not exist".format(path) path = None success = False elif not os.path.isdir(path): error = "'{}' is not a folder".format(path) path = None success = False if error is not None: self.error(error) warnings.warn(error, UserWarning, stacklevel=3) else: self.error() if path is not None: newindex = self.addRecentPath(path) self.recent_cb.setCurrentIndex(newindex) if newindex >= 0: self.currentPath = path else: self.currentPath = None else: self.currentPath = None self.__actions.reload.setEnabled(self.currentPath is not None) if self.__state == State.Processing: self.cancel() self.source = self.LOCAL_FILE return success def addRecentPath(self, path): """ Prepend a path entry to the list of recent paths If an entry with the same path already exists in the recent path list it is moved to the first place Parameters ---------- path : str """ existing = None for pathitem in self.recent_paths: try: if os.path.samefile(pathitem.abspath, path): existing = pathitem break except FileNotFoundError: # file not found if the `pathitem.abspath` no longer exists pass model = self.recent_cb.model() if existing is not None: selected_index = self.recent_paths.index(existing) assert model.item(selected_index).data(Qt.UserRole) is existing self.recent_paths.remove(existing) row = model.takeRow(selected_index) self.recent_paths.insert(0, existing) model.insertRow(0, row) else: item = RecentPath(path, None, None) self.recent_paths.insert(0, item) model.insertRow(0, RecentPath_asqstandarditem(item)) return 0 def __setRuntimeState(self, state): assert state in State self.setBlocking(state == State.Processing) message = "" if state == State.Processing: assert self.__state in [State.Done, State.NoState, State.Error, State.Cancelled] message = "Processing" elif state == State.Done: assert self.__state == State.Processing elif state == State.Cancelled: assert self.__state == State.Processing message = "Cancelled" elif state == State.Error: message = "Error during processing" elif state == State.NoState: message = "" else: assert False self.__state = state if self.__state == State.Processing: self.infostack.setCurrentIndex(1) else: self.infostack.setCurrentIndex(0) self.setStatusMessage(message) self.__updateInfo() def reload(self): """ Restart the text scan task """ if self.__state == State.Processing: self.cancel() self.source = self.LOCAL_FILE self.corpus = None self.start() def start(self): """ Start/execute the text indexing operation """ self.error() self.Warning.clear() self.progress_widget.setValue(0) self.__invalidated = False startdir = self.currentPath if self.source == self.LOCAL_FILE \ else self.url_combo.currentText().strip() if not startdir: return if self.__state == State.Processing: assert self.__pendingTask is not None log.info("Starting a new task while one is in progress. " "Cancel the existing task (dir:'{}')" .format(self.__pendingTask.startdir)) self.cancel() self.__setRuntimeState(State.Processing) report_progress = methodinvoke( self, "__onReportProgress", (object,)) task = ImportDocuments(startdir, self.source == self.URL, report_progress=report_progress) # collect the task state in one convenient place self.__pendingTask = taskstate = namespace( task=task, startdir=startdir, future=None, watcher=None, cancelled=False, cancel=None, ) def cancel(): # Cancel the task and disconnect if taskstate.future.cancel(): pass else: taskstate.task.cancelled = True taskstate.cancelled = True try: taskstate.future.result(timeout=0) except UserInterruptError: pass except TimeoutError: log.info("The task did not stop in in a timely manner") taskstate.watcher.finished.disconnect(self.__onRunFinished) taskstate.cancel = cancel def run_text_scan_task_interupt(): try: return task.run() except UserInterruptError: # Suppress interrupt errors, so they are not logged return taskstate.future = self.__executor.submit(run_text_scan_task_interupt) taskstate.watcher = FutureWatcher(taskstate.future) taskstate.watcher.finished.connect(self.__onRunFinished) @Slot() def __onRunFinished(self): assert QThread.currentThread() is self.thread() assert self.__state == State.Processing assert self.__pendingTask is not None assert self.sender() is self.__pendingTask.watcher assert self.__pendingTask.future.done() task = self.__pendingTask self.__pendingTask = None corpus, errors = None, [] try: corpus, errors = task.future.result() except NoDocumentsException: state = State.Error self.error("Folder contains no readable files.") except Exception: sys.excepthook(*sys.exc_info()) state = State.Error self.error(traceback.format_exc()) else: state = State.Done self.error() if corpus: self.n_text_data = len(corpus) self.n_text_categories = len(corpus.domain.class_var.values)\ if corpus.domain.class_var else 0 self.corpus = corpus if self.corpus: self.corpus.name = "Documents" self.skipped_documents = errors if len(errors): self.Warning.read_error( "Some files" if len(errors) > 1 else "One file" ) self.__setRuntimeState(state) self.commit() def cancel(self): """ Cancel current pending task (if any). """ if self.__state == State.Processing: assert self.__pendingTask is not None self.__pendingTask.cancel() self.__pendingTask = None self.__setRuntimeState(State.Cancelled) @Slot(object) def __onReportProgress(self, arg): # report on scan progress from a worker thread # arg must be a namespace(count: int, lastpath: str) assert QThread.currentThread() is self.thread() if self.__state == State.Processing: self.pathlabel.setText(prettifypath(arg.lastpath)) self.progress_widget.setValue(int(100 * arg.progress)) def commit(self): """ Create and commit a Corpus from the collected text meta data. """ self.Outputs.data.send(self.corpus) if self.skipped_documents: skipped_table = ( Table.from_list( SKIPPED_DOMAIN, [[x, os.path.join(self.currentPath, x)] for x in self.skipped_documents] ) ) skipped_table.name = "Skipped documents" else: skipped_table = None self.Outputs.skipped_documents.send(skipped_table) def onDeleteWidget(self): self.cancel() self.__executor.shutdown(wait=True) self.__invalidated = False def eventFilter(self, receiver, event): # re-implemented from QWidget # intercept and process drag drop events on the recent directory # selection combo box def dirpath(event): # type: (QDropEvent) -> Optional[str] """Return the directory from a QDropEvent.""" data = event.mimeData() urls = data.urls() if len(urls) == 1: url = urls[0] path = url.toLocalFile() if os.path.isdir(path): return path return None if receiver is self.recent_cb and \ event.type() in {QEvent.DragEnter, QEvent.DragMove, QEvent.Drop}: assert isinstance(event, QDropEvent) path = dirpath(event) if path is not None and event.possibleActions() & Qt.LinkAction: event.setDropAction(Qt.LinkAction) event.accept() if event.type() == QEvent.Drop: self.setCurrentPath(path) self.start() else: event.ignore() return True return super().eventFilter(receiver, event) def send_report(self): if not self.currentPath: return items = [('Path', self.currentPath), ('Number of documents', self.n_text_data)] if self.n_text_categories: items += [('Categories', self.n_text_categories)] if self.skipped_documents: items += [('Number of skipped', len(self.skipped_documents))] self.report_items(items, )