class OWRank(OWWidget): name = "Rank" description = "Rank and filter data features by their relevance." icon = "icons/Rank.svg" priority = 1102 buttons_area_orientation = Qt.Vertical inputs = [("Data", Table, "setData"), ("Scorer", score.Scorer, "set_learner", widget.Multiple)] outputs = [("Reduced Data", Table, widget.Default), ("Scores", Table)] SelectNone, SelectAll, SelectManual, SelectNBest = range(4) cls_default_selected = Setting({"Gain Ratio", "Gini Decrease"}) reg_default_selected = Setting( {"Univariate Linear Regression", "RReliefF"}) selectMethod = Setting(SelectNBest) nSelected = Setting(5) auto_apply = Setting(True) # Header state for discrete/continuous/no_class scores headerState = Setting([None, None, None]) settings_version = 1 settingsHandler = DomainContextHandler() selected_rows = ContextSetting([]) gain = inf_gain = gini = anova = chi2 = ulr = relief = rrelief = fcbc = True _score_vars = [ "gain", "inf_gain", "gini", "anova", "chi2", "relief", "fcbc", "ulr", "rrelief" ] class Warning(OWWidget.Warning): no_target_var = Msg("Data does not have a target variable") class Error(OWWidget.Error): invalid_type = Msg("Cannot handle target variable type {}") inadequate_learner = Msg("{}") def __init__(self): super().__init__() self.measure_scores = None self.update_scores = True self.usefulAttributes = [] self.learners = {} self.labels = [] self.out_domain_desc = None self.all_measures = SCORES self.selectedMeasures = dict([(m.name, True) for m in self.all_measures]) # Discrete (0) or continuous (1) class mode self.rankMode = 0 self.data = None self.discMeasures = [ m for m in self.all_measures if issubclass(DiscreteVariable, m.score.class_type) ] self.contMeasures = [ m for m in self.all_measures if issubclass(ContinuousVariable, m.score.class_type) ] self.score_checks = [] self.cls_scoring_box = gui.vBox(None, "Scoring for Classification") self.reg_scoring_box = gui.vBox(None, "Scoring for Regression") boxes = [self.cls_scoring_box] * 7 + [self.reg_scoring_box] * 2 for _score, var, box in zip(SCORES, self._score_vars, boxes): check = gui.checkBox( box, self, var, label=_score.name, callback=lambda val=_score: self.measuresSelectionChanged(val)) self.score_checks.append(check) self.score_stack = QStackedWidget(self) self.score_stack.addWidget(self.cls_scoring_box) self.score_stack.addWidget(self.reg_scoring_box) self.score_stack.addWidget(QWidget()) self.controlArea.layout().addWidget(self.score_stack) gui.rubber(self.controlArea) selMethBox = gui.vBox(self.controlArea, "Select Attributes", addSpace=True) grid = QGridLayout() grid.setContentsMargins(6, 0, 6, 0) self.selectButtons = QButtonGroup() self.selectButtons.buttonClicked[int].connect(self.setSelectMethod) def button(text, buttonid, toolTip=None): b = QRadioButton(text) self.selectButtons.addButton(b, buttonid) if toolTip is not None: b.setToolTip(toolTip) return b b1 = button(self.tr("None"), OWRank.SelectNone) b2 = button(self.tr("All"), OWRank.SelectAll) b3 = button(self.tr("Manual"), OWRank.SelectManual) b4 = button(self.tr("Best ranked:"), OWRank.SelectNBest) s = gui.spin(selMethBox, self, "nSelected", 1, 100, callback=self.nSelectedChanged) grid.addWidget(b1, 0, 0) grid.addWidget(b2, 1, 0) grid.addWidget(b3, 2, 0) grid.addWidget(b4, 3, 0) grid.addWidget(s, 3, 1) self.selectButtons.button(self.selectMethod).setChecked(True) selMethBox.layout().addLayout(grid) gui.auto_commit(selMethBox, self, "auto_apply", "Send", box=False) # Discrete, continuous and no_class table views are stacked self.ranksViewStack = QStackedLayout() self.mainArea.layout().addLayout(self.ranksViewStack) self.discRanksView = QTableView() self.ranksViewStack.addWidget(self.discRanksView) self.discRanksView.setSelectionBehavior(QTableView.SelectRows) self.discRanksView.setSelectionMode(QTableView.MultiSelection) self.discRanksView.setSortingEnabled(True) self.discRanksLabels = ["#"] + [m.shortname for m in self.discMeasures] self.discRanksModel = QStandardItemModel(self) self.discRanksModel.setHorizontalHeaderLabels(self.discRanksLabels) self.discRanksProxyModel = MySortProxyModel(self) self.discRanksProxyModel.setSourceModel(self.discRanksModel) self.discRanksView.setModel(self.discRanksProxyModel) self.discRanksView.setColumnWidth(0, 20) self.discRanksView.selectionModel().selectionChanged.connect( self.commit) self.discRanksView.pressed.connect(self.onSelectItem) self.discRanksView.horizontalHeader().sectionClicked.connect( self.headerClick) self.discRanksView.verticalHeader().sectionClicked.connect( self.onSelectItem) if self.headerState[0] is not None: self.discRanksView.horizontalHeader().restoreState( self.headerState[0]) self.contRanksView = QTableView() self.ranksViewStack.addWidget(self.contRanksView) self.contRanksView.setSelectionBehavior(QTableView.SelectRows) self.contRanksView.setSelectionMode(QTableView.MultiSelection) self.contRanksView.setSortingEnabled(True) self.contRanksLabels = ["#"] + [m.shortname for m in self.contMeasures] self.contRanksModel = QStandardItemModel(self) self.contRanksModel.setHorizontalHeaderLabels(self.contRanksLabels) self.contRanksProxyModel = MySortProxyModel(self) self.contRanksProxyModel.setSourceModel(self.contRanksModel) self.contRanksView.setModel(self.contRanksProxyModel) self.contRanksView.setColumnWidth(0, 20) self.contRanksView.selectionModel().selectionChanged.connect( self.commit) self.contRanksView.pressed.connect(self.onSelectItem) self.contRanksView.horizontalHeader().sectionClicked.connect( self.headerClick) self.contRanksView.verticalHeader().sectionClicked.connect( self.onSelectItem) if self.headerState[1] is not None: self.contRanksView.horizontalHeader().restoreState( self.headerState[1]) self.noClassRanksView = QTableView() self.ranksViewStack.addWidget(self.noClassRanksView) self.noClassRanksView.setSelectionBehavior(QTableView.SelectRows) self.noClassRanksView.setSelectionMode(QTableView.MultiSelection) self.noClassRanksView.setSortingEnabled(True) self.noClassRanksLabels = ["#"] self.noClassRanksModel = QStandardItemModel(self) self.noClassRanksModel.setHorizontalHeaderLabels( self.noClassRanksLabels) self.noClassRanksProxyModel = MySortProxyModel(self) self.noClassRanksProxyModel.setSourceModel(self.noClassRanksModel) self.noClassRanksView.setModel(self.noClassRanksProxyModel) self.noClassRanksView.setColumnWidth(0, 20) self.noClassRanksView.selectionModel().selectionChanged.connect( self.commit) self.noClassRanksView.pressed.connect(self.onSelectItem) self.noClassRanksView.horizontalHeader().sectionClicked.connect( self.headerClick) self.noClassRanksView.verticalHeader().sectionClicked.connect( self.onSelectItem) if self.headerState[2] is not None: self.noClassRanksView.horizontalHeader().restoreState( self.headerState[2]) # Switch the current view to Discrete self.switchRanksMode(0) self.resetInternals() self.updateDelegates() self.updateVisibleScoreColumns() self.resize(690, 500) self.measure_scores = table((len(self.measures), 0), None) def switchRanksMode(self, index): """ Switch between discrete/continuous/no_class mode """ self.rankMode = index self.ranksViewStack.setCurrentIndex(index) if index == 0: self.ranksView = self.discRanksView self.ranksModel = self.discRanksModel self.ranksProxyModel = self.discRanksProxyModel self.measures = self.discMeasures self.selected_checks = self.cls_default_selected self.reg_scoring_box.setSizePolicy(QSizePolicy.Ignored, QSizePolicy.Ignored) self.cls_scoring_box.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Expanding) elif index == 1: self.ranksView = self.contRanksView self.ranksModel = self.contRanksModel self.ranksProxyModel = self.contRanksProxyModel self.measures = self.contMeasures self.selected_checks = self.reg_default_selected self.cls_scoring_box.setSizePolicy(QSizePolicy.Ignored, QSizePolicy.Ignored) self.reg_scoring_box.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Expanding) else: self.ranksView = self.noClassRanksView self.ranksModel = self.noClassRanksModel self.ranksProxyModel = self.noClassRanksProxyModel self.measures = [] self.selected_checks = set() self.reg_scoring_box.setSizePolicy(QSizePolicy.Ignored, QSizePolicy.Ignored) self.cls_scoring_box.setSizePolicy(QSizePolicy.Ignored, QSizePolicy.Ignored) shape = (len(self.measures) + len(self.learners), 0) self.measure_scores = table(shape, None) self.update_scores = False for check, score in zip(self.score_checks, SCORES): check.setChecked(score.name in self.selected_checks) self.update_scores = True self.score_stack.setCurrentIndex(index) self.updateVisibleScoreColumns() @check_sql_input def setData(self, data): self.closeContext() self.clear_messages() self.resetInternals() self.data = data self.switchRanksMode(0) if self.data is not None: domain = self.data.domain attrs = domain.attributes self.usefulAttributes = [ attr for attr in attrs if attr.is_discrete or attr.is_continuous ] if domain.has_continuous_class: self.switchRanksMode(1) elif not domain.class_var: self.Warning.no_target_var() self.switchRanksMode(2) elif not domain.has_discrete_class: self.Error.invalid_type(type(domain.class_var).__name__) if issparse( self.data.X): # keep only measures supporting sparse data self.measures = [ m for m in self.measures if m.score.supports_sparse_data ] self.ranksModel.setRowCount(len(attrs)) for i, a in enumerate(attrs): if a.is_discrete: v = len(a.values) else: v = "C" item = ScoreValueItem() item.setData(v, Qt.DisplayRole) self.ranksModel.setItem(i, 0, item) item = QStandardItem(a.name) item.setData(gui.attributeIconDict[a], Qt.DecorationRole) self.ranksModel.setVerticalHeaderItem(i, item) shape = (len(self.measures) + len(self.learners), len(attrs)) self.measure_scores = table(shape, None) self.updateScores() else: self.send("Scores", None) self.selected_rows = [] self.openContext(data) self.selectMethodChanged() self.commit() def get_selection(self): selection = self.ranksView.selectionModel().selection() return list(set(ind.row() for ind in selection.indexes())) def set_learner(self, learner, lid=None): if learner is None and lid is not None: del self.learners[lid] elif learner is not None: self.learners[lid] = ScoreMeta(learner.name, learner.name, learner) attrs_len = 0 if not self.data else len(self.data.domain.attributes) shape = (len(self.learners), attrs_len) self.measure_scores = self.measure_scores[:len(self.measures)] self.measure_scores += table(shape, None) self.contRanksModel.setHorizontalHeaderLabels(self.contRanksLabels) self.discRanksModel.setHorizontalHeaderLabels(self.discRanksLabels) self.noClassRanksModel.setHorizontalHeaderLabels( self.noClassRanksLabels) measures_mask = [False] * len(self.measures) measures_mask += [True for _ in self.learners] self.updateScores(measures_mask) self.commit() def updateScores(self, measuresMask=None): """ Update the current computed scores. If `measuresMask` is given it must be an list of bool values indicating what measures should be recomputed. """ if not self.data: return if self.data.has_missing(): self.information("Missing values have been imputed.") measures = self.measures + [v for k, v in self.learners.items()] if measuresMask is None: # Update all selected measures measuresMask = [ self.selectedMeasures.get(m.name) for m in self.measures ] measuresMask = measuresMask + [ v.name for k, v in self.learners.items() ] data = self.data learner_col = len(self.measures) if len(measuresMask) <= len(self.measures) or \ measuresMask[len(self.measures)]: self.labels = [] self.Error.inadequate_learner.clear() self.setStatusMessage("Running") with self.progressBar(): n_measure_update = len([x for x in measuresMask if x is not False]) count = 0 for index, (meas, mask) in enumerate(zip(measures, measuresMask)): if not mask: continue self.progressBarSet(90 * count / n_measure_update) count += 1 if index < len(self.measures): estimator = meas.score() try: self.measure_scores[index] = estimator(data) except ValueError: self.measure_scores[index] = [] for attr in data.domain.attributes: try: self.measure_scores[index].append( estimator(data, attr)) except ValueError: self.measure_scores[index].append(None) else: learner = meas.score if isinstance(learner, Learner) and \ not learner.check_learner_adequacy(self.data.domain): self.Error.inadequate_learner( learner.learner_adequacy_err_msg) scores = table((1, len(data.domain.attributes))) else: scores = meas.score.score_data(data) for i, row in enumerate(scores): self.labels.append(meas.shortname + str(i + 1)) if len(self.measure_scores) > learner_col: self.measure_scores[learner_col] = row else: self.measure_scores.append(row) learner_col += 1 self.progressBarSet(90) self.contRanksModel.setHorizontalHeaderLabels(self.contRanksLabels + self.labels) self.discRanksModel.setHorizontalHeaderLabels(self.discRanksLabels + self.labels) self.noClassRanksModel.setHorizontalHeaderLabels( self.noClassRanksLabels + self.labels) self.updateRankModel(measuresMask) self.ranksProxyModel.invalidate() self.selectMethodChanged() self.send("Scores", self.create_scores_table(self.labels)) self.setStatusMessage("") def updateRankModel(self, measuresMask): """ Update the rankModel. """ values = [] diff = len(self.measure_scores) - len(measuresMask) if len(measuresMask): measuresMask += [measuresMask[-1]] * diff for i in range(self.ranksModel.columnCount() - 1, len(self.measure_scores), -1): self.ranksModel.removeColumn(i) for i, (scores, m) in enumerate(zip(self.measure_scores, measuresMask)): if not m and self.ranksModel.item(0, i + 1): values.append([]) continue values_one = [] for j, _score in enumerate(scores): values_one.append(_score) item = self.ranksModel.item(j, i + 1) if not item: item = ScoreValueItem() self.ranksModel.setItem(j, i + 1, item) item.setData(_score, Qt.DisplayRole) values.append(values_one) for i, (vals, m) in enumerate(zip(values, measuresMask)): if not m: continue valid_vals = [v for v in vals if v is not None] if valid_vals: vmin, vmax = min(valid_vals), max(valid_vals) for j, v in enumerate(vals): if v is not None: # Set the bar ratio role for i-th measure. ratio = float((v - vmin) / ((vmax - vmin) or 1)) item = self.ranksModel.item(j, i + 1) item.setData(ratio, gui.BarRatioRole) self.ranksView.setColumnWidth(0, 20) self.ranksView.resizeRowsToContents() def resetInternals(self): self.data = None self.usefulAttributes = [] self.ranksModel.setRowCount(0) def onSelectItem(self, index): """ Called when the user selects/unselects an item in the table view. """ self.selectMethod = OWRank.SelectManual # Manual self.selectButtons.button(self.selectMethod).setChecked(True) self.commit() def setSelectMethod(self, method): if self.selectMethod != method: self.selectMethod = method self.selectButtons.button(method).setChecked(True) self.selectMethodChanged() def selectMethodChanged(self): self.autoSelection() self.ranksView.setFocus() def nSelectedChanged(self): self.selectMethod = OWRank.SelectNBest self.selectButtons.button(self.selectMethod).setChecked(True) self.selectMethodChanged() def autoSelection(self): selModel = self.ranksView.selectionModel() rowCount = self.ranksModel.rowCount() columnCount = self.ranksModel.columnCount() model = self.ranksProxyModel if self.selectMethod == OWRank.SelectNone: selection = QItemSelection() elif self.selectMethod == OWRank.SelectAll: selection = QItemSelection( model.index(0, 0), model.index(rowCount - 1, columnCount - 1)) elif self.selectMethod == OWRank.SelectNBest: nSelected = min(self.nSelected, rowCount) selection = QItemSelection( model.index(0, 0), model.index(nSelected - 1, columnCount - 1)) else: selection = QItemSelection() if len(self.selected_rows): selection = QItemSelection() for row in self.selected_rows: selection.append( QItemSelectionRange(model.index(row, 0), model.index(row, columnCount - 1))) selModel.select(selection, QItemSelectionModel.ClearAndSelect) def headerClick(self, index): if index >= 1 and self.selectMethod == OWRank.SelectNBest: # Reselect the top ranked attributes self.autoSelection() # Store the header states disc = bytes(self.discRanksView.horizontalHeader().saveState()) cont = bytes(self.contRanksView.horizontalHeader().saveState()) no_class = bytes(self.noClassRanksView.horizontalHeader().saveState()) self.headerState = [disc, cont, no_class] def measuresSelectionChanged(self, measure): """Measure selection has changed. Update column visibility. """ checked = self.selectedMeasures[measure.name] self.selectedMeasures[measure.name] = not checked if not checked: self.selected_checks.add(measure.name) elif measure.name in self.selected_checks: self.selected_checks.remove(measure.name) measures_mask = [False] * len(self.measures) measures_mask += [False for _ in self.learners] # Update scores for shown column if they are not yet computed. if measure in self.measures and self.measure_scores: index = self.measures.index(measure) if all(s is None for s in self.measure_scores[index]): measures_mask[index] = True if self.update_scores: self.updateScores(measures_mask) self.updateVisibleScoreColumns() def updateVisibleScoreColumns(self): """ Update the visible columns of the scores view. """ for i, measure in enumerate(self.measures): shown = self.selectedMeasures.get(measure.name) self.ranksView.setColumnHidden(i + 1, not shown) self.ranksView.setColumnWidth(i + 1, 100) index = self.ranksView.horizontalHeader().sortIndicatorSection() if self.ranksView.isColumnHidden(index): self.headerState[self.rankMode] = None if self.headerState[self.rankMode] is None: def get_sort_by_col(measures, selected_measures): cols = [ i + 1 for i, m in enumerate(measures) if m.name in selected_measures ] return cols[0] if cols else len(measures) + 1 col = get_sort_by_col(self.measures, self.selected_checks) self.ranksView.sortByColumn(col, Qt.DescendingOrder) self.autoSelection() def updateDelegates(self): self.contRanksView.setItemDelegate(gui.ColoredBarItemDelegate(self)) self.discRanksView.setItemDelegate(gui.ColoredBarItemDelegate(self)) self.noClassRanksView.setItemDelegate(gui.ColoredBarItemDelegate(self)) def send_report(self): if not self.data: return self.report_domain("Input", self.data.domain) self.report_table("Ranks", self.ranksView, num_format="{:.3f}") if self.out_domain_desc is not None: self.report_items("Output", self.out_domain_desc) def commit(self): self.selected_rows = self.get_selection() if self.data and len(self.data.domain.attributes) == len( self.selected_rows): self.selectMethod = OWRank.SelectAll self.selectButtons.button(self.selectMethod).setChecked(True) selected = self.selectedAttrs() if not self.data or not selected: self.send("Reduced Data", None) self.out_domain_desc = None else: data = Table( Domain(selected, self.data.domain.class_var, self.data.domain.metas), self.data) self.send("Reduced Data", data) self.out_domain_desc = report.describe_domain(data.domain) def selectedAttrs(self): if self.data: inds = self.ranksView.selectionModel().selectedRows(0) source = self.ranksProxyModel.mapToSource inds = map(source, inds) inds = [ind.row() for ind in inds] return [self.data.domain.attributes[i] for i in inds] else: return [] def create_scores_table(self, labels): indices = [ i for i, m in enumerate(self.measures) if self.selectedMeasures.get(m.name, False) ] measures = [ s.name for s in self.measures if self.selectedMeasures.get(s.name, False) ] measures += [label for label in labels] if not measures: return None features = [ContinuousVariable(s) for s in measures] metas = [StringVariable("Feature name")] domain = Domain(features, metas=metas) scores = np.nan_to_num( np.array([ row for i, row in enumerate(self.measure_scores) if i in indices or i >= len(self.measures) ], dtype=np.float64).T) feature_names = np.array([a.name for a in self.data.domain.attributes]) # Reshape to 2d array as Table does not like 1d arrays feature_names = feature_names[:, None] new_table = Table(domain, scores, metas=feature_names) new_table.name = "Feature Scores" return new_table @classmethod def migrate_settings(cls, settings, version): if not version: # Before fc5caa1e1d716607f1f5c4e0b0be265c23280fa0 # headerState had length 2 headerState = settings.get("headerState", None) if headerState is not None and \ isinstance(headerState, tuple) and \ len(headerState) < 3: headerState = (list(headerState) + [None] * 3)[:3] settings["headerState"] = headerState
class OWDataTable(OWWidget): name = "Data Table" description = "View the dataset in a spreadsheet." icon = "icons/Table.svg" priority = 50 keywords = [] buttons_area_orientation = Qt.Vertical class Inputs: data = Input("Data", Table, multiple=True) class Outputs: selected_data = Output("Selected Data", Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table) show_distributions = Setting(False) dist_color_RGB = Setting((220, 220, 220, 255)) show_attribute_labels = Setting(True) select_rows = Setting(True) auto_commit = Setting(True) color_by_class = Setting(True) settingsHandler = DomainContextHandler( match_values=DomainContextHandler.MATCH_VALUES_ALL) selected_rows = Setting([], schema_only=True) selected_cols = Setting([], schema_only=True) def __init__(self): super().__init__() self._inputs = OrderedDict() self.__pending_selected_rows = self.selected_rows self.selected_rows = None self.__pending_selected_cols = self.selected_cols self.selected_cols = None self.dist_color = QColor(*self.dist_color_RGB) info_box = gui.vBox(self.controlArea, "Info") self.info_ex = gui.widgetLabel(info_box, 'No data on input.', ) self.info_ex.setWordWrap(True) self.info_attr = gui.widgetLabel(info_box, ' ') self.info_attr.setWordWrap(True) self.info_class = gui.widgetLabel(info_box, ' ') self.info_class.setWordWrap(True) self.info_meta = gui.widgetLabel(info_box, ' ') self.info_meta.setWordWrap(True) info_box.setMinimumWidth(200) gui.separator(self.controlArea) box = gui.vBox(self.controlArea, "Variables") self.c_show_attribute_labels = gui.checkBox( box, self, "show_attribute_labels", "Show variable labels (if present)", callback=self._on_show_variable_labels_changed) gui.checkBox(box, self, "show_distributions", 'Visualize numeric values', callback=self._on_distribution_color_changed) gui.checkBox(box, self, "color_by_class", 'Color by instance classes', callback=self._on_distribution_color_changed) box = gui.vBox(self.controlArea, "Selection") gui.checkBox(box, self, "select_rows", "Select full rows", callback=self._on_select_rows_changed) gui.rubber(self.controlArea) reset = gui.button( None, self, "Restore Original Order", callback=self.restore_order, tooltip="Show rows in the original order", autoDefault=False) self.buttonsArea.layout().insertWidget(0, reset) gui.auto_send(self.buttonsArea, self, "auto_commit") # GUI with tabs self.tabs = gui.tabWidget(self.mainArea) self.tabs.currentChanged.connect(self._on_current_tab_changed) def copy_to_clipboard(self): self.copy() @staticmethod def sizeHint(): return QSize(800, 500) @Inputs.data def set_dataset(self, data, tid=None): """Set the input dataset.""" self.closeContext() if data is not None: datasetname = getattr(data, "name", "Data") if tid in self._inputs: # update existing input slot slot = self._inputs[tid] view = slot.view # reset the (header) view state. view.setModel(None) view.horizontalHeader().setSortIndicator(-1, Qt.AscendingOrder) assert self.tabs.indexOf(view) != -1 self.tabs.setTabText(self.tabs.indexOf(view), datasetname) else: view = QTableView() view.setSortingEnabled(True) view.setHorizontalScrollMode(QTableView.ScrollPerPixel) if self.select_rows: view.setSelectionBehavior(QTableView.SelectRows) header = view.horizontalHeader() header.setSectionsMovable(True) header.setSectionsClickable(True) header.setSortIndicatorShown(True) header.setSortIndicator(-1, Qt.AscendingOrder) # QHeaderView does not 'reset' the model sort column, # because there is no guaranty (requirement) that the # models understand the -1 sort column. def sort_reset(index, order): if view.model() is not None and index == -1: view.model().sort(index, order) header.sortIndicatorChanged.connect(sort_reset) self.tabs.addTab(view, datasetname) view.dataset = data self.tabs.setCurrentWidget(view) self._setup_table_view(view, data) slot = TableSlot(tid, data, table_summary(data), view) view._input_slot = slot # pylint: disable=protected-access self._inputs[tid] = slot self.tabs.setCurrentIndex(self.tabs.indexOf(view)) self.set_info(slot.summary) if isinstance(slot.summary.len, concurrent.futures.Future): def update(_): QMetaObject.invokeMethod( self, "_update_info", Qt.QueuedConnection) slot.summary.len.add_done_callback(update) elif tid in self._inputs: slot = self._inputs.pop(tid) view = slot.view view.hide() view.deleteLater() self.tabs.removeTab(self.tabs.indexOf(view)) current = self.tabs.currentWidget() if current is not None: # pylint: disable=protected-access self.set_info(current._input_slot.summary) self.tabs.tabBar().setVisible(self.tabs.count() > 1) self.openContext(data) if data and self.__pending_selected_rows is not None: self.selected_rows = self.__pending_selected_rows self.__pending_selected_rows = None else: self.selected_rows = [] if data and self.__pending_selected_cols is not None: self.selected_cols = self.__pending_selected_cols self.__pending_selected_cols = None else: self.selected_cols = [] self.set_selection() self.unconditional_commit() def _setup_table_view(self, view, data): """Setup the `view` (QTableView) with `data` (Orange.data.Table) """ if data is None: view.setModel(None) return datamodel = RichTableModel(data) rowcount = data.approx_len() if self.color_by_class and data.domain.has_discrete_class: color_schema = [ QColor(*c) for c in data.domain.class_var.colors] else: color_schema = None if self.show_distributions: view.setItemDelegate( gui.TableBarItem( self, color=self.dist_color, color_schema=color_schema) ) else: view.setItemDelegate(QStyledItemDelegate(self)) # Enable/disable view sorting based on data's type view.setSortingEnabled(is_sortable(data)) header = view.horizontalHeader() header.setSectionsClickable(is_sortable(data)) header.setSortIndicatorShown(is_sortable(data)) view.setModel(datamodel) vheader = view.verticalHeader() option = view.viewOptions() size = view.style().sizeFromContents( QStyle.CT_ItemViewItem, option, QSize(20, 20), view) vheader.setDefaultSectionSize(size.height() + 2) vheader.setMinimumSectionSize(5) vheader.setSectionResizeMode(QHeaderView.Fixed) # Limit the number of rows displayed in the QTableView # (workaround for QTBUG-18490 / QTBUG-28631) maxrows = (2 ** 31 - 1) // (vheader.defaultSectionSize() + 2) if rowcount > maxrows: sliceproxy = TableSliceProxy( parent=view, rowSlice=slice(0, maxrows)) sliceproxy.setSourceModel(datamodel) # First reset the view (without this the header view retains # it's state - at this point invalid/broken) view.setModel(None) view.setModel(sliceproxy) assert view.model().rowCount() <= maxrows assert vheader.sectionSize(0) > 1 or datamodel.rowCount() == 0 # update the header (attribute names) self._update_variable_labels(view) selmodel = BlockSelectionModel( view.model(), parent=view, selectBlocks=not self.select_rows) view.setSelectionModel(selmodel) view.selectionModel().selectionChanged.connect(self.update_selection) #noinspection PyBroadException def set_corner_text(self, table, text): """Set table corner text.""" # As this is an ugly hack, do everything in # try - except blocks, as it may stop working in newer Qt. # pylint: disable=broad-except if not hasattr(table, "btn") and not hasattr(table, "btnfailed"): try: btn = table.findChild(QAbstractButton) class Efc(QObject): @staticmethod def eventFilter(o, e): if (isinstance(o, QAbstractButton) and e.type() == QEvent.Paint): # paint by hand (borrowed from QTableCornerButton) btn = o opt = QStyleOptionHeader() opt.initFrom(btn) state = QStyle.State_None if btn.isEnabled(): state |= QStyle.State_Enabled if btn.isActiveWindow(): state |= QStyle.State_Active if btn.isDown(): state |= QStyle.State_Sunken opt.state = state opt.rect = btn.rect() opt.text = btn.text() opt.position = QStyleOptionHeader.OnlyOneSection painter = QStylePainter(btn) painter.drawControl(QStyle.CE_Header, opt) return True # eat event return False table.efc = Efc() # disconnect default handler for clicks and connect a new one, which supports # both selection and deselection of all data btn.clicked.disconnect() btn.installEventFilter(table.efc) btn.clicked.connect(self._on_select_all) table.btn = btn if sys.platform == "darwin": btn.setAttribute(Qt.WA_MacSmallSize) except Exception: table.btnfailed = True if hasattr(table, "btn"): try: btn = table.btn btn.setText(text) opt = QStyleOptionHeader() opt.text = btn.text() s = btn.style().sizeFromContents( QStyle.CT_HeaderSection, opt, QSize(), btn).expandedTo(QApplication.globalStrut()) if s.isValid(): table.verticalHeader().setMinimumWidth(s.width()) except Exception: pass def _on_select_all(self, _): # pylint: disable=protected-access data_info = self.tabs.currentWidget()._input_slot.summary if len(self.selected_rows) == data_info.len \ and len(self.selected_cols) == len(data_info.domain): self.tabs.currentWidget().clearSelection() else: self.tabs.currentWidget().selectAll() def _on_current_tab_changed(self, index): """Update the info box on current tab change""" view = self.tabs.widget(index) if view is not None and view.model() is not None: # pylint: disable=protected-access self.set_info(view._input_slot.summary) else: self.set_info(None) def _update_variable_labels(self, view): "Update the variable labels visibility for `view`" model = view.model() if isinstance(model, TableSliceProxy): model = model.sourceModel() if self.show_attribute_labels: model.setRichHeaderFlags( RichTableModel.Labels | RichTableModel.Name) labelnames = set() domain = model.source.domain for a in itertools.chain(domain.metas, domain.variables): labelnames.update(a.attributes.keys()) labelnames = sorted( [label for label in labelnames if not label.startswith("_")]) self.set_corner_text(view, "\n".join([""] + labelnames)) else: model.setRichHeaderFlags(RichTableModel.Name) self.set_corner_text(view, "") def _on_show_variable_labels_changed(self): """The variable labels (var.attribues) visibility was changed.""" for slot in self._inputs.values(): self._update_variable_labels(slot.view) def _on_distribution_color_changed(self): for ti in range(self.tabs.count()): widget = self.tabs.widget(ti) model = widget.model() while isinstance(model, QAbstractProxyModel): model = model.sourceModel() data = model.source class_var = data.domain.class_var if self.color_by_class and class_var and class_var.is_discrete: color_schema = [QColor(*c) for c in class_var.colors] else: color_schema = None if self.show_distributions: delegate = gui.TableBarItem(self, color=self.dist_color, color_schema=color_schema) else: delegate = QStyledItemDelegate(self) widget.setItemDelegate(delegate) tab = self.tabs.currentWidget() if tab: tab.reset() def _on_select_rows_changed(self): for slot in self._inputs.values(): selection_model = slot.view.selectionModel() selection_model.setSelectBlocks(not self.select_rows) if self.select_rows: slot.view.setSelectionBehavior(QTableView.SelectRows) # Expand the current selection to full row selection. selection_model.select( selection_model.selection(), QItemSelectionModel.Select | QItemSelectionModel.Rows ) else: slot.view.setSelectionBehavior(QTableView.SelectItems) def restore_order(self): """Restore the original data order of the current view.""" table = self.tabs.currentWidget() if table is not None: table.horizontalHeader().setSortIndicator(-1, Qt.AscendingOrder) def set_info(self, summary): if summary is None: self.info_ex.setText("No data on input.") self.info_attr.setText("") self.info_class.setText("") self.info_meta.setText("") else: info_len, info_attr, info_class, info_meta = \ format_summary(summary) self.info_ex.setText(info_len) self.info_attr.setText(info_attr) self.info_class.setText(info_class) self.info_meta.setText(info_meta) @Slot() def _update_info(self): current = self.tabs.currentWidget() if current is not None and current.model() is not None: # pylint: disable=protected-access self.set_info(current._input_slot.summary) def update_selection(self, *_): self.commit() def set_selection(self): if self.selected_rows and self.selected_cols: view = self.tabs.currentWidget() model = view.model() if model.rowCount() <= self.selected_rows[-1] or \ model.columnCount() <= self.selected_cols[-1]: return selection = QItemSelection() rowranges = list(ranges(self.selected_rows)) colranges = list(ranges(self.selected_cols)) for rowstart, rowend in rowranges: for colstart, colend in colranges: selection.append( QItemSelectionRange( view.model().index(rowstart, colstart), view.model().index(rowend - 1, colend - 1) ) ) view.selectionModel().select( selection, QItemSelectionModel.ClearAndSelect) @staticmethod def get_selection(view): """ Return the selected row and column indices of the selection in view. """ selmodel = view.selectionModel() selection = selmodel.selection() model = view.model() # map through the proxies into input table. while isinstance(model, QAbstractProxyModel): selection = model.mapSelectionToSource(selection) model = model.sourceModel() assert isinstance(selmodel, BlockSelectionModel) assert isinstance(model, TableModel) row_spans, col_spans = selection_blocks(selection) rows = list(itertools.chain.from_iterable(itertools.starmap(range, row_spans))) cols = list(itertools.chain.from_iterable(itertools.starmap(range, col_spans))) rows = numpy.array(rows, dtype=numpy.intp) # map the rows through the applied sorting (if any) rows = model.mapToSourceRows(rows) rows.sort() rows = rows.tolist() return rows, cols @staticmethod def _get_model(view): model = view.model() while isinstance(model, QAbstractProxyModel): model = model.sourceModel() return model def commit(self): """ Commit/send the current selected row/column selection. """ selected_data = table = rowsel = None view = self.tabs.currentWidget() if view and view.model() is not None: model = self._get_model(view) table = model.source # The input data table # Selections of individual instances are not implemented # for SqlTables if isinstance(table, SqlTable): self.Outputs.selected_data.send(selected_data) self.Outputs.annotated_data.send(None) return rowsel, colsel = self.get_selection(view) self.selected_rows, self.selected_cols = rowsel, colsel def select(data, rows, domain): """ Select the data subset with specified rows and domain subsets. If either rows or domain is None they mean select all. """ if rows is not None and domain is not None: return data.from_table(domain, data, rows) elif rows is not None: return data.from_table(data.domain, rows) elif domain is not None: return data.from_table(domain, data) else: return data domain = table.domain if len(colsel) < len(domain) + len(domain.metas): # only a subset of the columns is selected allvars = domain.class_vars + domain.metas + domain.attributes columns = [(c, model.headerData(c, Qt.Horizontal, TableModel.DomainRole)) for c in colsel] assert all(role is not None for _, role in columns) def select_vars(role): """select variables for role (TableModel.DomainRole)""" return [allvars[c] for c, r in columns if r == role] attrs = select_vars(TableModel.Attribute) if attrs and issparse(table.X): # for sparse data you can only select all attributes attrs = table.domain.attributes class_vars = select_vars(TableModel.ClassVar) metas = select_vars(TableModel.Meta) domain = Orange.data.Domain(attrs, class_vars, metas) # Avoid a copy if all/none rows are selected. if not rowsel: selected_data = None elif len(rowsel) == len(table): selected_data = select(table, None, domain) else: selected_data = select(table, rowsel, domain) self.Outputs.selected_data.send(selected_data) self.Outputs.annotated_data.send(create_annotated_table(table, rowsel)) def copy(self): """ Copy current table selection to the clipboard. """ view = self.tabs.currentWidget() if view is not None: mime = table_selection_to_mime_data(view) QApplication.clipboard().setMimeData( mime, QClipboard.Clipboard ) def send_report(self): view = self.tabs.currentWidget() if not view or not view.model(): return model = self._get_model(view) self.report_data_brief(model.source) self.report_table(view)
class OWDistances(OWWidget, ConcurrentWidgetMixin): name = "Distances" description = "Compute a matrix of pairwise distances." icon = "icons/Distance.svg" keywords = [] class Inputs: data = Input("Data", Orange.data.Table) class Outputs: distances = Output("Distances", Orange.misc.DistMatrix, dynamic=False) settings_version = 3 axis = Setting(0) # type: int metric_idx = Setting(0) # type: int #: Use normalized distances if the metric supports it. #: The default is `True`, expect when restoring from old pre v2 settings #: (see `migrate_settings`). normalized_dist = Setting(True) # type: bool autocommit = Setting(True) # type: bool want_main_area = False buttons_area_orientation = Qt.Vertical class Error(OWWidget.Error): no_continuous_features = Msg("No numeric features") no_binary_features = Msg("No binary features") dense_metric_sparse_data = Msg("{} requires dense data.") distances_memory_error = Msg("Not enough memory") distances_value_error = Msg("Problem in calculation:\n{}") data_too_large_for_mahalanobis = Msg( "Mahalanobis handles up to 1000 {}.") class Warning(OWWidget.Warning): ignoring_discrete = Msg("Ignoring categorical features") ignoring_nonbinary = Msg("Ignoring non-binary features") imputing_data = Msg("Missing values were imputed") def __init__(self): OWWidget.__init__(self) ConcurrentWidgetMixin.__init__(self) self.data = None gui.radioButtons(self.controlArea, self, "axis", ["Rows", "Columns"], box="Distances between", callback=self._invalidate) box = gui.widgetBox(self.controlArea, "Distance Metric") self.metrics_combo = gui.comboBox(box, self, "metric_idx", items=[m[0] for m in METRICS], callback=self._metric_changed) self.normalization_check = gui.checkBox( box, self, "normalized_dist", "Normalized", callback=self._invalidate, tooltip=("All dimensions are (implicitly) scaled to a common" "scale to normalize the influence across the domain."), stateWhenDisabled=False) _, metric = METRICS[self.metric_idx] self.normalization_check.setEnabled(metric.supports_normalization) gui.auto_apply(self.controlArea, self, "autocommit") self.layout().setSizeConstraint(self.layout().SetFixedSize) @Inputs.data @check_sql_input def set_data(self, data): self.cancel() self.data = data self.refresh_metrics() self.unconditional_commit() def refresh_metrics(self): sparse = self.data is not None and issparse(self.data.X) for i, metric in enumerate(METRICS): item = self.metrics_combo.model().item(i) item.setEnabled(not sparse or metric[1].supports_sparse) def commit(self): # pylint: disable=invalid-sequence-index metric = METRICS[self.metric_idx][1] self.compute_distances(metric, self.data) def compute_distances(self, metric, data): def _check_sparse(): # pylint: disable=invalid-sequence-index if issparse(data.X) and not metric.supports_sparse: self.Error.dense_metric_sparse_data( METRICS[self.metric_idx][0]) return False return True def _fix_discrete(): nonlocal data if data.domain.has_discrete_attributes() \ and metric is not distance.Jaccard \ and (issparse(data.X) and getattr(metric, "fallback", None) or not metric.supports_discrete or self.axis == 1): if not data.domain.has_continuous_attributes(): self.Error.no_continuous_features() return False self.Warning.ignoring_discrete() data = distance.remove_discrete_features(data) return True def _fix_nonbinary(): nonlocal data if metric is distance.Jaccard and not issparse(data.X): nbinary = sum(a.is_discrete and len(a.values) == 2 for a in data.domain.attributes) if not nbinary: self.Error.no_binary_features() return False elif nbinary < len(data.domain.attributes): self.Warning.ignoring_nonbinary() data = distance.remove_nonbinary_features(data) return True def _fix_missing(): nonlocal data if not metric.supports_missing and bn.anynan(data.X): self.Warning.imputing_data() data = distance.impute(data) return True def _check_tractability(): if metric is distance.Mahalanobis: if self.axis == 1: # when computing distances by columns, we want < 100 rows if len(data) > 1000: self.Error.data_too_large_for_mahalanobis("rows") return False else: if len(data.domain.attributes) > 1000: self.Error.data_too_large_for_mahalanobis("columns") return False return True self.clear_messages() if data is not None: for check in (_check_sparse, _check_tractability, _fix_discrete, _fix_missing, _fix_nonbinary): if not check(): data = None break self.start(DistanceRunner.run, data, metric, self.normalized_dist, self.axis) def on_partial_result(self, _): pass def on_done(self, result: Orange.misc.DistMatrix): assert isinstance(result, Orange.misc.DistMatrix) or result is None self.Outputs.distances.send(result) def on_exception(self, ex): if isinstance(ex, ValueError): self.Error.distances_value_error(ex) elif isinstance(ex, MemoryError): self.Error.distances_memory_error() elif isinstance(ex, InterruptException): pass else: raise ex def onDeleteWidget(self): self.shutdown() super().onDeleteWidget() def _invalidate(self): self.commit() def _metric_changed(self): metric = METRICS[self.metric_idx][1] self.normalization_check.setEnabled(metric.supports_normalization) self._invalidate() def send_report(self): # pylint: disable=invalid-sequence-index self.report_items((("Distances Between", ["Rows", "Columns"][self.axis]), ("Metric", METRICS[self.metric_idx][0]))) @classmethod def migrate_settings(cls, settings, version): if version is None or version < 2 and "normalized_dist" not in settings: # normalize_dist is set to False when restoring settings from # an older version to preserve old semantics. settings["normalized_dist"] = False if version is None or version < 3: # Mahalanobis was moved from idx = 2 to idx = 9 metric_idx = settings["metric_idx"] if metric_idx == 2: settings["metric_idx"] = 9 elif 2 < metric_idx <= 9: settings["metric_idx"] -= 1
class OWReport(OWWidget): name = "Report" save_dir = Setting("") open_dir = Setting("") def __init__(self): super().__init__() self._setup_ui_() self.report_changed = False index_file = pkg_resources.resource_filename(__name__, "index.html") with open(index_file, "r") as f: self.report_html_template = f.read() def _setup_ui_(self): self.table_model = ReportItemModel(0, len(Column.__members__)) self.table = ReportTable(self.controlArea) self.table.setModel(self.table_model) self.table.setShowGrid(False) self.table.setSelectionBehavior(QTableView.SelectRows) self.table.setSelectionMode(QTableView.SingleSelection) self.table.setWordWrap(False) self.table.setMouseTracking(True) self.table.verticalHeader().setSectionResizeMode(QHeaderView.Fixed) self.table.verticalHeader().setDefaultSectionSize(20) self.table.verticalHeader().setVisible(False) self.table.horizontalHeader().setVisible(False) self.table.setFixedWidth(250) self.table.setColumnWidth(Column.item, 200) self.table.setColumnWidth(Column.remove, 23) self.table.setColumnWidth(Column.scheme, 25) self.table.clicked.connect(self._table_clicked) self.table.selectionModel().selectionChanged.connect( self._table_selection_changed) self.controlArea.layout().addWidget(self.table) self.last_scheme = None self.scheme_button = gui.button(self.controlArea, self, "Back to Last Scheme", callback=self._show_last_scheme) box = gui.hBox(self.controlArea) box.setContentsMargins(-6, 0, -6, 0) self.save_button = gui.button(box, self, "Save", callback=self.save_report) self.print_button = gui.button(box, self, "Print", callback=self._print_report) class PyBridge(QObject): @pyqtSlot(str) def _select_item(myself, item_id): item = self.table_model.get_item_by_id(item_id) self.table.selectRow( self.table_model.indexFromItem(item).row()) self._change_selected_item(item) @pyqtSlot(str, str) def _add_comment(myself, item_id, value): item = self.table_model.get_item_by_id(item_id) item.comment = value self.report_changed = True self.report_view = WebviewWidget(self.mainArea, bridge=PyBridge(self)) self.mainArea.layout().addWidget(self.report_view) @deprecated("Widgets should not be pickled") def __getstate__(self): rep_dict = self.__dict__.copy() for key in ('_OWWidget__env', 'controlArea', 'mainArea', 'report_view', 'table', 'table_model'): del rep_dict[key] items_len = self.table_model.rowCount() return rep_dict, [self.table_model.item(i) for i in range(items_len)] @deprecated("Widgets should not be pickled") def __setstate__(self, state): rep_dict, items = state self.__dict__.update(rep_dict) self._setup_ui_() for i in range(len(items)): item = items[i] self.table_model.add_item( ReportItem(item.name, item.html, item.scheme, item.module, item.icon_name, item.comment)) def _table_clicked(self, index): if index.column() == Column.remove: self._remove_item(index.row()) indexes = self.table.selectionModel().selectedIndexes() if indexes: item = self.table_model.item(indexes[0].row()) self._scroll_to_item(item) self._change_selected_item(item) if index.column() == Column.scheme: self._show_scheme(index.row()) def _table_selection_changed(self, new_selection, _): if new_selection.indexes(): item = self.table_model.item(new_selection.indexes()[0].row()) self._scroll_to_item(item) self._change_selected_item(item) def _remove_item(self, row): self.table_model.removeRow(row) self.report_changed = True self._build_html() def clear(self): self.table_model.clear() self.report_changed = True self._build_html() def _add_item(self, widget): name = widget.get_widget_name_extension() name = "{} - {}".format(widget.name, name) if name else widget.name item = ReportItem(name, widget.report_html, self._get_scheme(), widget.__module__, widget.icon) self.table_model.add_item(item) self.report_changed = True return item def _build_html(self): html = self.report_html_template html += "<body>" for i in range(self.table_model.rowCount()): item = self.table_model.item(i) html += "<div id='{}' class='normal' " \ "onClick='pybridge._select_item(this.id)'>{}<div " \ "class='textwrapper'><textarea " \ "placeholder='Write a comment...'" \ "onInput='this.innerHTML = this.value;" \ "pybridge._add_comment(this.parentNode.parentNode.id, this.value);'" \ ">{}</textarea></div>" \ "</div>".format(item.id, item.html, item.comment) html += "</body></html>" self.report_view.setHtml(html) def _scroll_to_item(self, item): self.report_view.evalJS( "document.getElementById('{}').scrollIntoView();".format(item.id)) def _change_selected_item(self, item): self.report_view.evalJS( "var sel_el = document.getElementsByClassName('selected')[0]; " "if (sel_el.id != {}) " " sel_el.className = 'normal';".format(item.id)) self.report_view.evalJS( "document.getElementById('{}').className = 'selected';".format( item.id)) self.report_changed = True def make_report(self, widget): item = self._add_item(widget) self._build_html() self._scroll_to_item(item) self.table.selectRow(self.table_model.rowCount() - 1) def _get_scheme(self): canvas = self.get_canvas_instance() return canvas.get_scheme_xml() if canvas else None def _show_scheme(self, row): scheme = self.table_model.item(row).scheme canvas = self.get_canvas_instance() if canvas: document = canvas.current_document() if document.isModifiedStrict(): self.last_scheme = canvas.get_scheme_xml() canvas.load_scheme_xml(scheme) def _show_last_scheme(self): if self.last_scheme: canvas = self.get_canvas_instance() if canvas: canvas.load_scheme_xml(self.last_scheme) def save_report(self): """Save report""" filename, _ = QFileDialog.getSaveFileName( self, "Save Report", self.save_dir, "HTML (*.html);;PDF (*.pdf);;Report (*.report)") if not filename: return QDialog.Rejected self.save_dir = os.path.dirname(filename) self.saveSettings() _, extension = os.path.splitext(filename) if extension == ".pdf": printer = QPrinter() printer.setPageSize(QPrinter.A4) printer.setOutputFormat(QPrinter.PdfFormat) printer.setOutputFileName(filename) self.report_view.print_(printer) elif extension == ".report": self.save(filename) else: def save_html(contents): try: with open(filename, "w", encoding="utf-8") as f: f.write(contents) except PermissionError: self.permission_error(filename) save_html(self.report_view.html()) self.report_changed = False return QDialog.Accepted def _print_report(self): printer = QPrinter() print_dialog = QPrintDialog(printer, self) print_dialog.setWindowTitle("Print report") if print_dialog.exec_() != QDialog.Accepted: return self.report_view.print_(printer) def open_report(self): filename, _ = QFileDialog.getOpenFileName(self, "Open Report", self.open_dir, "Report (*.report)") if not filename: return self.report_changed = False self.open_dir = os.path.dirname(filename) self.saveSettings() try: report = self.load(filename) except (IOError, AttributeError, pickle.UnpicklingError) as e: message_critical(self.tr("Could not load an Orange Report file"), title=self.tr("Error"), informative_text=self.tr( "Error occurred " "while loading '{}'.").format(filename), exc_info=True, parent=self) log.error(str(e), exc_info=True) return self.set_instance(report) self = report self._build_html() self.table.selectRow(0) self.show() self.raise_() def save(self, filename): attributes = {} for key in ('last_scheme', 'open_dir'): attributes[key] = getattr(self, key, None) items = [ self.table_model.item(i) for i in range(self.table_model.rowCount()) ] report = dict(__version__=1, attributes=attributes, items=items) try: with open(filename, 'wb') as f: pickle.dump(report, f) except PermissionError: self.permission_error(filename) @classmethod def load(cls, filename): with open(filename, 'rb') as f: report = pickle.load(f) if not isinstance(report, dict): return report self = cls() self.__dict__.update(report['attributes']) for item in report['items']: self.table_model.add_item( ReportItem(item.name, item.html, item.scheme, item.module, item.icon_name, item.comment)) return self def permission_error(self, filename): message_critical( self.tr("Permission error when trying to write report."), title=self.tr("Error"), informative_text=self.tr("Permission error occurred " "while saving '{}'.").format(filename), exc_info=True, parent=self) log.error("PermissionError when trying to write report.", exc_info=True) def is_empty(self): return not self.table_model.rowCount() def is_changed(self): return self.report_changed @staticmethod def set_instance(report): app_inst = QApplication.instance() app_inst._report_window = report @staticmethod def get_instance(): app_inst = QApplication.instance() if not hasattr(app_inst, "_report_window"): report = OWReport() app_inst._report_window = report return app_inst._report_window @staticmethod def get_canvas_instance(): for widget in QApplication.topLevelWidgets(): if isinstance(widget, CanvasMainWindow): return widget
class OWConfusionMatrix(widget.OWWidget): """Confusion matrix widget""" name = "Confusion Matrix" description = "Display a confusion matrix constructed from " \ "the results of classifier evaluations." icon = "icons/ConfusionMatrix.svg" priority = 1001 keywords = [] class Inputs: evaluation_results = Input("Evaluation Results", Orange.evaluation.Results) class Outputs: selected_data = Output("Selected Data", Orange.data.Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table) quantities = [ "Number of instances", "Proportion of predicted", "Proportion of actual" ] settings_version = 1 settingsHandler = ClassValuesContextHandler() selected_learner = Setting([0], schema_only=True) selection = ContextSetting(set()) selected_quantity = Setting(0) append_predictions = Setting(True) append_probabilities = Setting(False) autocommit = Setting(True) UserAdviceMessages = [ widget.Message( "Clicking on cells or in headers outputs the corresponding " "data instances", "click_cell") ] class Error(widget.OWWidget.Error): no_regression = Msg("Confusion Matrix cannot show regression results.") invalid_values = Msg( "Evaluation Results input contains invalid values") empty_input = widget.Msg("Empty result on input. Nothing to display.") def __init__(self): super().__init__() self.data = None self.results = None self.learners = [] self.headers = [] self.learners_box = gui.listBox(self.controlArea, self, "selected_learner", "learners", box='Learners', callback=self._learner_changed) self.outputbox = gui.vBox(self.buttonsArea) box = gui.hBox(self.outputbox) gui.checkBox(box, self, "append_predictions", "Predictions", callback=self._invalidate) gui.checkBox(box, self, "append_probabilities", "Probabilities", callback=self._invalidate) gui.auto_apply(self.outputbox, self, "autocommit", box=False) box = gui.vBox(self.mainArea, box=True) sbox = gui.hBox(box) gui.rubber(sbox) gui.comboBox(sbox, self, "selected_quantity", items=self.quantities, label="Show: ", orientation=Qt.Horizontal, callback=self._update) self.tablemodel = QStandardItemModel(self) view = self.tableview = QTableView( editTriggers=QTableView.NoEditTriggers) view.setModel(self.tablemodel) view.horizontalHeader().hide() view.verticalHeader().hide() view.horizontalHeader().setMinimumSectionSize(60) view.selectionModel().selectionChanged.connect(self._invalidate) view.setShowGrid(False) view.setItemDelegate(BorderedItemDelegate(Qt.white)) view.setSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.MinimumExpanding) view.clicked.connect(self.cell_clicked) box.layout().addWidget(view) selbox = gui.hBox(box) gui.button(selbox, self, "Select Correct", callback=self.select_correct, autoDefault=False) gui.button(selbox, self, "Select Misclassified", callback=self.select_wrong, autoDefault=False) gui.button(selbox, self, "Clear Selection", callback=self.select_none, autoDefault=False) @staticmethod def sizeHint(): """Initial size""" return QSize(750, 340) def _item(self, i, j): return self.tablemodel.item(i, j) or QStandardItem() def _set_item(self, i, j, item): self.tablemodel.setItem(i, j, item) def _init_table(self, nclasses): item = self._item(0, 2) item.setData("Predicted", Qt.DisplayRole) item.setTextAlignment(Qt.AlignCenter) item.setFlags(Qt.NoItemFlags) self._set_item(0, 2, item) item = self._item(2, 0) item.setData("Actual", Qt.DisplayRole) item.setTextAlignment(Qt.AlignHCenter | Qt.AlignBottom) item.setFlags(Qt.NoItemFlags) self.tableview.setItemDelegateForColumn(0, gui.VerticalItemDelegate()) self._set_item(2, 0, item) self.tableview.setSpan(0, 2, 1, nclasses) self.tableview.setSpan(2, 0, nclasses, 1) font = self.tablemodel.invisibleRootItem().font() bold_font = QFont(font) bold_font.setBold(True) for i in (0, 1): for j in (0, 1): item = self._item(i, j) item.setFlags(Qt.NoItemFlags) self._set_item(i, j, item) for p, label in enumerate(self.headers): for i, j in ((1, p + 2), (p + 2, 1)): item = self._item(i, j) item.setData(label, Qt.DisplayRole) item.setFont(bold_font) item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) item.setFlags(Qt.ItemIsEnabled) if p < len(self.headers) - 1: item.setData("br"[j == 1], BorderRole) item.setData(QColor(192, 192, 192), BorderColorRole) self._set_item(i, j, item) hor_header = self.tableview.horizontalHeader() if len(' '.join(self.headers)) < 120: hor_header.setSectionResizeMode(QHeaderView.ResizeToContents) else: hor_header.setDefaultSectionSize(60) self.tablemodel.setRowCount(nclasses + 3) self.tablemodel.setColumnCount(nclasses + 3) @Inputs.evaluation_results def set_results(self, results): """Set the input results.""" # false positive, pylint: disable=no-member prev_sel_learner = self.selected_learner.copy() self.clear() self.warning() self.closeContext() data = None if results is not None and results.data is not None: data = results.data[results.row_indices] self.Error.no_regression.clear() self.Error.empty_input.clear() if data is not None and not data.domain.has_discrete_class: self.Error.no_regression() data = results = None elif results is not None and not results.actual.size: self.Error.empty_input() data = results = None nan_values = False if results is not None: assert isinstance(results, Orange.evaluation.Results) if np.any(np.isnan(results.actual)) or \ np.any(np.isnan(results.predicted)): # Error out here (could filter them out with a warning # instead). nan_values = True results = data = None self.Error.invalid_values(shown=nan_values) self.results = results self.data = data if data is not None: class_values = data.domain.class_var.values elif results is not None: raise NotImplementedError if results is None: self.report_button.setDisabled(True) return self.report_button.setDisabled(False) nmodels = results.predicted.shape[0] self.headers = class_values + \ (unicodedata.lookup("N-ARY SUMMATION"), ) # NOTE: The 'learner_names' is set in 'Test Learners' widget. self.learners = getattr(results, "learner_names", [f"Learner #{i + 1}" for i in range(nmodels)]) self._init_table(len(class_values)) self.openContext(data.domain.class_var) if not prev_sel_learner or prev_sel_learner[0] >= len(self.learners): if self.learners: self.selected_learner[:] = [0] else: self.selected_learner[:] = prev_sel_learner self._update() self._set_selection() self.commit.now() def clear(self): """Reset the widget, clear controls""" self.results = None self.data = None self.tablemodel.clear() self.headers = [] # Clear learners last. This action will invoke `_learner_changed` self.learners = [] def select_correct(self): """Select the diagonal elements of the matrix""" selection = QItemSelection() n = self.tablemodel.rowCount() for i in range(2, n): index = self.tablemodel.index(i, i) selection.select(index, index) self.tableview.selectionModel().select( selection, QItemSelectionModel.ClearAndSelect) def select_wrong(self): """Select the off-diagonal elements of the matrix""" selection = QItemSelection() n = self.tablemodel.rowCount() for i in range(2, n): for j in range(i + 1, n): index = self.tablemodel.index(i, j) selection.select(index, index) index = self.tablemodel.index(j, i) selection.select(index, index) self.tableview.selectionModel().select( selection, QItemSelectionModel.ClearAndSelect) def select_none(self): """Reset selection""" self.tableview.selectionModel().clear() def cell_clicked(self, model_index): """Handle cell click event""" i, j = model_index.row(), model_index.column() if not i or not j: return n = self.tablemodel.rowCount() index = self.tablemodel.index selection = None if i == j == 1 or i == j == n - 1: selection = QItemSelection(index(2, 2), index(n - 1, n - 1)) elif i in (1, n - 1): selection = QItemSelection(index(2, j), index(n - 1, j)) elif j in (1, n - 1): selection = QItemSelection(index(i, 2), index(i, n - 1)) if selection is not None: self.tableview.selectionModel().select( selection, QItemSelectionModel.ClearAndSelect) def _prepare_data(self): indices = self.tableview.selectedIndexes() indices = {(ind.row() - 2, ind.column() - 2) for ind in indices} actual = self.results.actual learner_name = self.learners[self.selected_learner[0]] predicted = self.results.predicted[self.selected_learner[0]] selected = [ i for i, t in enumerate(zip(actual, predicted)) if t in indices ] extra = [] class_var = self.data.domain.class_var metas = self.data.domain.metas attrs = self.data.domain.attributes names = [var.name for var in chain(metas, [class_var], attrs)] if self.append_predictions: extra.append(predicted.reshape(-1, 1)) proposed = "{}({})".format(class_var.name, learner_name) name = get_unique_names(names, proposed) var = Orange.data.DiscreteVariable(name, class_var.values) metas = metas + (var, ) if self.append_probabilities and \ self.results.probabilities is not None: probs = self.results.probabilities[self.selected_learner[0]] extra.append(np.array(probs, dtype=object)) pvars = [ Orange.data.ContinuousVariable("p({})".format(value)) for value in class_var.values ] metas = metas + tuple(pvars) domain = Orange.data.Domain(self.data.domain.attributes, self.data.domain.class_vars, metas) data = self.data.transform(domain) if extra: with data.unlocked(data.metas): data.metas[:, len(self.data.domain.metas):] = \ np.hstack(tuple(extra)) data.name = learner_name if selected: annotated_data = create_annotated_table(data, selected) data = data[selected] else: annotated_data = create_annotated_table(data, []) data = None return data, annotated_data @gui.deferred def commit(self): """Output data instances corresponding to selected cells""" if self.results is not None and self.data is not None \ and self.selected_learner: data, annotated_data = self._prepare_data() else: data = None annotated_data = None self.Outputs.selected_data.send(data) self.Outputs.annotated_data.send(annotated_data) def _invalidate(self): indices = self.tableview.selectedIndexes() self.selection = {(ind.row() - 2, ind.column() - 2) for ind in indices} self.commit.deferred() def _set_selection(self): selection = QItemSelection() index = self.tableview.model().index for row, col in self.selection: sel = index(row + 2, col + 2) selection.select(sel, sel) self.tableview.selectionModel().select( selection, QItemSelectionModel.ClearAndSelect) def _learner_changed(self): self._update() self._set_selection() self.commit.deferred() def _update(self): def _isinvalid(x): return isnan(x) or isinf(x) # Update the displayed confusion matrix if self.results is not None and self.selected_learner: cmatrix = confusion_matrix(self.results, self.selected_learner[0]) colsum = cmatrix.sum(axis=0) rowsum = cmatrix.sum(axis=1) n = len(cmatrix) diag = np.diag_indices(n) colors = cmatrix.astype(np.double) colors[diag] = 0 if self.selected_quantity == 0: normalized = cmatrix.astype(int) formatstr = "{}" div = np.array([colors.max()]) else: if self.selected_quantity == 1: normalized = 100 * cmatrix / colsum div = colors.max(axis=0) else: normalized = 100 * cmatrix / rowsum[:, np.newaxis] div = colors.max(axis=1)[:, np.newaxis] formatstr = "{:2.1f} %" div[div == 0] = 1 colors /= div maxval = normalized[diag].max() if maxval > 0: colors[diag] = normalized[diag] / maxval for i in range(n): for j in range(n): val = normalized[i, j] col_val = colors[i, j] item = self._item(i + 2, j + 2) item.setData( "NA" if _isinvalid(val) else formatstr.format(val), Qt.DisplayRole) bkcolor = QColor.fromHsl( [0, 240][i == j], 160, 255 if _isinvalid(col_val) else int(255 - 30 * col_val)) item.setData(QBrush(bkcolor), Qt.BackgroundRole) # bkcolor is light-ish so use a black text item.setData(QBrush(Qt.black), Qt.ForegroundRole) item.setData("trbl", BorderRole) item.setToolTip("actual: {}\npredicted: {}".format( self.headers[i], self.headers[j])) item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) item.setFlags(Qt.ItemIsEnabled | Qt.ItemIsSelectable) self._set_item(i + 2, j + 2, item) bold_font = self.tablemodel.invisibleRootItem().font() bold_font.setBold(True) def _sum_item(value, border=""): item = QStandardItem() item.setData(value, Qt.DisplayRole) item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) item.setFlags(Qt.ItemIsEnabled) item.setFont(bold_font) item.setData(border, BorderRole) item.setData(QColor(192, 192, 192), BorderColorRole) return item for i in range(n): self._set_item(n + 2, i + 2, _sum_item(int(colsum[i]), "t")) self._set_item(i + 2, n + 2, _sum_item(int(rowsum[i]), "l")) self._set_item(n + 2, n + 2, _sum_item(int(rowsum.sum()))) def send_report(self): """Send report""" if self.results is not None and self.selected_learner: self.report_table( "Confusion matrix for {} (showing {})".format( self.learners[self.selected_learner[0]], self.quantities[self.selected_quantity].lower()), self.tableview) @classmethod def migrate_settings(cls, settings, version): if not version: # For some period of time the 'selected_learner' property was # changed from List[int] -> int # (commit 4e49bb3fd0e11262f3ebf4b1116a91a4b49cc982) and then back # again (commit 8a492d79a2e17154a0881e24a05843406c8892c0) if "selected_learner" in settings and \ isinstance(settings["selected_learner"], int): settings["selected_learner"] = [settings["selected_learner"]]
class OWLoadModel(widget.OWWidget): name = "Load Model" description = "Load a model from an input file." priority = 3050 replaces = ["Orange.widgets.classify.owloadclassifier.OWLoadClassifier"] icon = "icons/LoadModel.svg" outputs = [("Model", Model, widget.Dynamic)] #: List of recent filenames. history = Setting([]) #: Current (last selected) filename or None. filename = Setting(None) class Error(widget.OWWidget.Error): load_error = Msg("An error occured while reading '{}'") FILTER = owsavemodel.OWSaveModel.FILTER want_main_area = False resizing_enabled = False def __init__(self): super().__init__() self.selectedIndex = -1 box = gui.widgetBox(self.controlArea, self.tr("File"), orientation=QHBoxLayout()) self.filesCB = gui.comboBox(box, self, "selectedIndex", callback=self._on_recent) self.filesCB.setMinimumContentsLength(20) self.filesCB.setSizeAdjustPolicy( QComboBox.AdjustToMinimumContentsLength) self.loadbutton = gui.button(box, self, "...", callback=self.browse) self.loadbutton.setIcon(self.style().standardIcon( QStyle.SP_DirOpenIcon)) self.loadbutton.setSizePolicy(QSizePolicy.Maximum, QSizePolicy.Fixed) self.reloadbutton = gui.button(box, self, "Reload", callback=self.reload, default=True) self.reloadbutton.setIcon(self.style().standardIcon( QStyle.SP_BrowserReload)) self.reloadbutton.setSizePolicy(QSizePolicy.Maximum, QSizePolicy.Fixed) # filter valid existing filenames self.history = list(filter(os.path.isfile, self.history))[:20] for filename in self.history: self.filesCB.addItem(os.path.basename(filename), userData=filename) # restore the current selection if the filename is # in the history list if self.filename in self.history: self.selectedIndex = self.history.index(self.filename) else: self.selectedIndex = -1 self.filename = None self.reloadbutton.setEnabled(False) if self.filename: QTimer.singleShot(0, lambda: self.load(self.filename)) def browse(self): """Select a filename using an open file dialog.""" if self.filename is None: startdir = stdpaths.Documents else: startdir = os.path.dirname(self.filename) filename, _ = QFileDialog.getOpenFileName(self, self.tr("Open"), directory=startdir, filter=self.FILTER) if filename: self.load(filename) def reload(self): """Reload the current file.""" self.load(self.filename) def load(self, filename): """Load the object from filename and send it to output.""" try: with open(filename, "rb") as f: model = pickle.load(f) except (pickle.UnpicklingError, OSError, EOFError): self.Error.load_error(os.path.split(filename)[-1]) else: self.Error.load_error.clear() self._remember(filename) self.send("Model", model) def _remember(self, filename): """ Remember `filename` was accessed. """ if filename in self.history: index = self.history.index(filename) del self.history[index] self.filesCB.removeItem(index) self.history.insert(0, filename) self.filesCB.insertItem(0, os.path.basename(filename), userData=filename) self.selectedIndex = 0 self.filename = filename self.reloadbutton.setEnabled(self.selectedIndex != -1) def _on_recent(self): self.load(self.history[self.selectedIndex])
class OWund_flux(widget.OWWidget): name = "und_flux" id = "orange.widgets.dataund_flux" description = "xoppy application to compute..." icon = "icons/xoppy_und_flux.png" author = "create_widget.py" maintainer_email = "*****@*****.**" priority = 10 category = "" keywords = ["xoppy", "und_flux"] outputs = [ #{"name": "xoppy_data", # "type": np.ndarray, # "doc": ""}, { "name": "xoppy_table", "type": Table, "doc": "" }, { "name": "xoppy_specfile", "type": str, "doc": "" } ] #inputs = [{"name": "Name", # "type": type, # "handler": None, # "doc": ""}] want_main_area = False ELECTRONENERGY = Setting(6.04) ELECTRONENERGYSPREAD = Setting(0.001) ELECTRONCURRENT = Setting(0.2) ELECTRONBEAMSIZEH = Setting(0.000395) ELECTRONBEAMSIZEV = Setting(9.9e-06) ELECTRONBEAMDIVERGENCEH = Setting(1.05e-05) ELECTRONBEAMDIVERGENCEV = Setting(3.9e-06) PERIODID = Setting(0.018) NPERIODS = Setting(222) KV = Setting(1.68) DISTANCE = Setting(30.0) GAPH = Setting(0.001) GAPV = Setting(0.001) PHOTONENERGYMIN = Setting(3000.0) PHOTONENERGYMAX = Setting(55000.0) PHOTONENERGYPOINTS = Setting(500) METHOD = Setting(0) def __init__(self): super().__init__() box0 = gui.widgetBox(self.controlArea, " ", orientation="horizontal") #widget buttons: compute, set defaults, help gui.button(box0, self, "Compute", callback=self.compute) gui.button(box0, self, "Defaults", callback=self.defaults) gui.button(box0, self, "Help", callback=self.help1) self.process_showers() box = gui.widgetBox(self.controlArea, " ", orientation="vertical") idx = -1 #widget index 0 idx += 1 box1 = gui.widgetBox(box) gui.lineEdit(box1, self, "ELECTRONENERGY", label=self.unitLabels()[idx], addSpace=True, valueType=float, validator=QDoubleValidator()) self.show_at(self.unitFlags()[idx], box1) #widget index 1 idx += 1 box1 = gui.widgetBox(box) gui.lineEdit(box1, self, "ELECTRONENERGYSPREAD", label=self.unitLabels()[idx], addSpace=True, valueType=float, validator=QDoubleValidator()) self.show_at(self.unitFlags()[idx], box1) #widget index 2 idx += 1 box1 = gui.widgetBox(box) gui.lineEdit(box1, self, "ELECTRONCURRENT", label=self.unitLabels()[idx], addSpace=True, valueType=float, validator=QDoubleValidator()) self.show_at(self.unitFlags()[idx], box1) #widget index 3 idx += 1 box1 = gui.widgetBox(box) gui.lineEdit(box1, self, "ELECTRONBEAMSIZEH", label=self.unitLabels()[idx], addSpace=True, valueType=float, validator=QDoubleValidator()) self.show_at(self.unitFlags()[idx], box1) #widget index 4 idx += 1 box1 = gui.widgetBox(box) gui.lineEdit(box1, self, "ELECTRONBEAMSIZEV", label=self.unitLabels()[idx], addSpace=True, valueType=float, validator=QDoubleValidator()) self.show_at(self.unitFlags()[idx], box1) #widget index 5 idx += 1 box1 = gui.widgetBox(box) gui.lineEdit(box1, self, "ELECTRONBEAMDIVERGENCEH", label=self.unitLabels()[idx], addSpace=True, valueType=float, validator=QDoubleValidator()) self.show_at(self.unitFlags()[idx], box1) #widget index 6 idx += 1 box1 = gui.widgetBox(box) gui.lineEdit(box1, self, "ELECTRONBEAMDIVERGENCEV", label=self.unitLabels()[idx], addSpace=True, valueType=float, validator=QDoubleValidator()) self.show_at(self.unitFlags()[idx], box1) #widget index 7 idx += 1 box1 = gui.widgetBox(box) gui.lineEdit(box1, self, "PERIODID", label=self.unitLabels()[idx], addSpace=True, valueType=float, validator=QDoubleValidator()) self.show_at(self.unitFlags()[idx], box1) #widget index 8 idx += 1 box1 = gui.widgetBox(box) gui.lineEdit(box1, self, "NPERIODS", label=self.unitLabels()[idx], addSpace=True, valueType=int, validator=QIntValidator()) self.show_at(self.unitFlags()[idx], box1) #widget index 9 idx += 1 box1 = gui.widgetBox(box) gui.lineEdit(box1, self, "KV", label=self.unitLabels()[idx], addSpace=True, valueType=float, validator=QDoubleValidator()) self.show_at(self.unitFlags()[idx], box1) #widget index 10 idx += 1 box1 = gui.widgetBox(box) gui.lineEdit(box1, self, "DISTANCE", label=self.unitLabels()[idx], addSpace=True, valueType=float, validator=QDoubleValidator()) self.show_at(self.unitFlags()[idx], box1) #widget index 11 idx += 1 box1 = gui.widgetBox(box) gui.lineEdit(box1, self, "GAPH", label=self.unitLabels()[idx], addSpace=True, valueType=float, validator=QDoubleValidator()) self.show_at(self.unitFlags()[idx], box1) #widget index 12 idx += 1 box1 = gui.widgetBox(box) gui.lineEdit(box1, self, "GAPV", label=self.unitLabels()[idx], addSpace=True, valueType=float, validator=QDoubleValidator()) self.show_at(self.unitFlags()[idx], box1) #widget index 13 idx += 1 box1 = gui.widgetBox(box) gui.lineEdit(box1, self, "PHOTONENERGYMIN", label=self.unitLabels()[idx], addSpace=True, valueType=float, validator=QDoubleValidator()) self.show_at(self.unitFlags()[idx], box1) #widget index 14 idx += 1 box1 = gui.widgetBox(box) gui.lineEdit(box1, self, "PHOTONENERGYMAX", label=self.unitLabels()[idx], addSpace=True, valueType=float, validator=QDoubleValidator()) self.show_at(self.unitFlags()[idx], box1) #widget index 15 idx += 1 box1 = gui.widgetBox(box) gui.lineEdit(box1, self, "PHOTONENERGYPOINTS", label=self.unitLabels()[idx], addSpace=True, valueType=int, validator=QIntValidator()) self.show_at(self.unitFlags()[idx], box1) #widget index 16 idx += 1 box1 = gui.widgetBox(box) gui.comboBox(box1, self, "METHOD", label=self.unitLabels()[idx], addSpace=True, items=['US', 'URGENT', 'SRW'], valueType=int, orientation="horizontal") self.show_at(self.unitFlags()[idx], box1) gui.rubber(self.controlArea) def unitLabels(self): return [ "Electron Energy [GeV]", "Electron Energy Spread", "Electron Current [A]", "Electron Beam Size H [m]", "Electron Beam Size V [m]", "Electron Beam Divergence H [rad]", "Electron Beam Divergence V [rad]", "Period ID [m]", "Number of periods", "Kv [undulator K value vertical field]", "Distance to slit [m]", "Slit gap H [m]", "Slit gap V [m]", "photon Energy Min [eV]", "photon Energy Max [eV]", "photon Energy Points", "calculation code" ] def unitFlags(self): return [ "True", "True", "True", "True", "True", "True", "True", "True", "True", "True", "True", "True", "True", "True", "True", "True", "True" ] #def unitNames(self): # return ["ELECTRONENERGY", "ELECTRONENERGYSPREAD", "ELECTRONCURRENT", "ELECTRONBEAMSIZEH", "ELECTRONBEAMSIZEV", "ELECTRONBEAMDIVERGENCEH", "ELECTRONBEAMDIVERGENCEV", "PERIODID", "NPERIODS", "KV", "DISTANCE", "GAPH", "GAPV", "PHOTONENERGYMIN", "PHOTONENERGYMAX", "PHOTONENERGYPOINTS", "METHOD"] def compute(self): fileName = xoppy_calc_und_flux( ELECTRONENERGY=self.ELECTRONENERGY, ELECTRONENERGYSPREAD=self.ELECTRONENERGYSPREAD, ELECTRONCURRENT=self.ELECTRONCURRENT, ELECTRONBEAMSIZEH=self.ELECTRONBEAMSIZEH, ELECTRONBEAMSIZEV=self.ELECTRONBEAMSIZEV, ELECTRONBEAMDIVERGENCEH=self.ELECTRONBEAMDIVERGENCEH, ELECTRONBEAMDIVERGENCEV=self.ELECTRONBEAMDIVERGENCEV, PERIODID=self.PERIODID, NPERIODS=self.NPERIODS, KV=self.KV, DISTANCE=self.DISTANCE, GAPH=self.GAPH, GAPV=self.GAPV, PHOTONENERGYMIN=self.PHOTONENERGYMIN, PHOTONENERGYMAX=self.PHOTONENERGYMAX, PHOTONENERGYPOINTS=self.PHOTONENERGYPOINTS, METHOD=self.METHOD) #send specfile self.send("xoppy_specfile", fileName) print("Loading file: ", fileName) #load spec file with one scan, # is comment out = np.loadtxt(fileName) print("data shape: ", out.shape) #get labels txt = open(fileName).readlines() tmp = [line.find("#L") for line in txt] itmp = np.where(np.array(tmp) != (-1)) labels = txt[itmp[0]].replace("#L ", "").split(" ") print("data labels: ", labels) # # build and send orange table # domain = Domain([ContinuousVariable(i) for i in labels]) table = Table.from_numpy(domain, out) self.send("xoppy_table", table) def defaults(self): self.resetSettings() self.compute() return def help1(self): print("help pressed.") xoppy_doc('und_flux')
class OWScatterPlot(OWWidget): """Scatterplot visualization with explorative analysis and intelligent data visualization enhancements.""" name = 'Scatter Plot' description = "Interactive scatter plot visualization with " \ "intelligent data visualization enhancements." icon = "icons/ScatterPlot.svg" priority = 140 inputs = [("Data", Table, "set_data", Default), ("Data Subset", Table, "set_subset_data"), ("Features", AttributeList, "set_shown_attributes")] outputs = [("Selected Data", Table, Default), (ANNOTATED_DATA_SIGNAL_NAME, Table), ("Features", Table)] settingsHandler = DomainContextHandler() auto_send_selection = Setting(True) auto_sample = Setting(True) toolbar_selection = Setting(0) attr_x = ContextSetting(None) attr_y = ContextSetting(None) selection = Setting(None, schema_only=True) graph = SettingProvider(OWScatterPlotGraph) jitter_sizes = [0, 0.1, 0.5, 1, 2, 3, 4, 5, 7, 10] graph_name = "graph.plot_widget.plotItem" class Information(OWWidget.Information): sampled_sql = Msg("Large SQL table; showing a sample.") def __init__(self): super().__init__() box = gui.vBox(self.mainArea, True, margin=0) self.graph = OWScatterPlotGraph(self, box, "ScatterPlot") box.layout().addWidget(self.graph.plot_widget) plot = self.graph.plot_widget axispen = QPen(self.palette().color(QPalette.Text)) axis = plot.getAxis("bottom") axis.setPen(axispen) axis = plot.getAxis("left") axis.setPen(axispen) self.data = None # Orange.data.Table self.subset_data = None # Orange.data.Table self.data_metas_X = None # self.data, where primitive metas are moved to X self.sql_data = None # Orange.data.sql.table.SqlTable self.attribute_selection_list = None # list of Orange.data.Variable self.__timer = QTimer(self, interval=1200) self.__timer.timeout.connect(self.add_data) common_options = dict( labelWidth=50, orientation=Qt.Horizontal, sendSelectedValue=True, valueType=str) box = gui.vBox(self.controlArea, "Axis Data") dmod = DomainModel self.xy_model = DomainModel(dmod.MIXED, valid_types=dmod.PRIMITIVE) self.cb_attr_x = gui.comboBox( box, self, "attr_x", label="Axis x:", callback=self.update_attr, model=self.xy_model, **common_options) self.cb_attr_y = gui.comboBox( box, self, "attr_y", label="Axis y:", callback=self.update_attr, model=self.xy_model, **common_options) vizrank_box = gui.hBox(box) gui.separator(vizrank_box, width=common_options["labelWidth"]) self.vizrank, self.vizrank_button = ScatterPlotVizRank.add_vizrank( vizrank_box, self, "Find Informative Projections", self.set_attr) gui.separator(box) gui.valueSlider( box, self, value='graph.jitter_size', label='Jittering: ', values=self.jitter_sizes, callback=self.reset_graph_data, labelFormat=lambda x: "None" if x == 0 else ("%.1f %%" if x < 1 else "%d %%") % x) gui.checkBox( gui.indentedBox(box), self, 'graph.jitter_continuous', 'Jitter continuous values', callback=self.reset_graph_data) self.sampling = gui.auto_commit( self.controlArea, self, "auto_sample", "Sample", box="Sampling", callback=self.switch_sampling, commit=lambda: self.add_data(1)) self.sampling.setVisible(False) g = self.graph.gui g.point_properties_box(self.controlArea) self.models = [self.xy_model] + g.points_models box = gui.vBox(self.controlArea, "Plot Properties") g.add_widgets([g.ShowLegend, g.ShowGridLines], box) gui.checkBox( box, self, value='graph.tooltip_shows_all', label='Show all data on mouse hover') self.cb_class_density = gui.checkBox( box, self, value='graph.class_density', label='Show class density', callback=self.update_density) self.cb_reg_line = gui.checkBox( box, self, value='graph.show_reg_line', label='Show regression line', callback=self.update_regression_line) gui.checkBox( box, self, 'graph.label_only_selected', 'Label only selected points', callback=self.graph.update_labels) self.zoom_select_toolbar = g.zoom_select_toolbar( gui.vBox(self.controlArea, "Zoom/Select"), nomargin=True, buttons=[g.StateButtonsBegin, g.SimpleSelect, g.Pan, g.Zoom, g.StateButtonsEnd, g.ZoomReset] ) buttons = self.zoom_select_toolbar.buttons buttons[g.Zoom].clicked.connect(self.graph.zoom_button_clicked) buttons[g.Pan].clicked.connect(self.graph.pan_button_clicked) buttons[g.SimpleSelect].clicked.connect(self.graph.select_button_clicked) buttons[g.ZoomReset].clicked.connect(self.graph.reset_button_clicked) self.controlArea.layout().addStretch(100) self.icons = gui.attributeIconDict p = self.graph.plot_widget.palette() self.graph.set_palette(p) gui.auto_commit(self.controlArea, self, "auto_send_selection", "Send Selection", "Send Automatically") def zoom(s): """Zoom in/out by factor `s`.""" viewbox = plot.getViewBox() # scaleBy scales the view's bounds (the axis range) viewbox.scaleBy((1 / s, 1 / s)) def fit_to_view(): viewbox = plot.getViewBox() viewbox.autoRange() zoom_in = QAction( "Zoom in", self, triggered=lambda: zoom(1.25) ) zoom_in.setShortcuts([QKeySequence(QKeySequence.ZoomIn), QKeySequence(self.tr("Ctrl+="))]) zoom_out = QAction( "Zoom out", self, shortcut=QKeySequence.ZoomOut, triggered=lambda: zoom(1 / 1.25) ) zoom_fit = QAction( "Fit in view", self, shortcut=QKeySequence(Qt.ControlModifier | Qt.Key_0), triggered=fit_to_view ) self.addActions([zoom_in, zoom_out, zoom_fit]) def keyPressEvent(self, event): super().keyPressEvent(event) self.graph.update_tooltip(event.modifiers()) def keyReleaseEvent(self, event): super().keyReleaseEvent(event) self.graph.update_tooltip(event.modifiers()) # def settingsFromWidgetCallback(self, handler, context): # context.selectionPolygons = [] # for curve in self.graph.selectionCurveList: # xs = [curve.x(i) for i in range(curve.dataSize())] # ys = [curve.y(i) for i in range(curve.dataSize())] # context.selectionPolygons.append((xs, ys)) # def settingsToWidgetCallback(self, handler, context): # selections = getattr(context, "selectionPolygons", []) # for (xs, ys) in selections: # c = SelectionCurve("") # c.setData(xs,ys) # c.attach(self.graph) # self.graph.selectionCurveList.append(c) def reset_graph_data(self, *_): if self.data is not None: self.graph.rescale_data() self.update_graph() def set_data(self, data): self.clear_messages() self.Information.sampled_sql.clear() self.__timer.stop() self.sampling.setVisible(False) self.sql_data = None if isinstance(data, SqlTable): if data.approx_len() < 4000: data = Table(data) else: self.Information.sampled_sql() self.sql_data = data data_sample = data.sample_time(0.8, no_cache=True) data_sample.download_data(2000, partial=True) data = Table(data_sample) self.sampling.setVisible(True) if self.auto_sample: self.__timer.start() if data is not None and (len(data) == 0 or len(data.domain) == 0): data = None if self.data and data and self.data.checksum() == data.checksum(): return self.closeContext() same_domain = (self.data and data and data.domain.checksum() == self.data.domain.checksum()) self.data = data self.data_metas_X = self.move_primitive_metas_to_X(data) if not same_domain: self.init_attr_values() self.vizrank.initialize() self.vizrank.attrs = self.data.domain.attributes if self.data is not None else [] self.vizrank_button.setEnabled( self.data is not None and self.data.domain.class_var is not None and len(self.data.domain.attributes) > 1 and len(self.data) > 1) if self.data is not None and self.data.domain.class_var is None \ and len(self.data.domain.attributes) > 1 and len(self.data) > 1: self.vizrank_button.setToolTip( "Data with a class variable is required.") else: self.vizrank_button.setToolTip("") self.openContext(self.data) def findvar(name, iterable): """Find a Orange.data.Variable in `iterable` by name""" for el in iterable: if isinstance(el, Orange.data.Variable) and el.name == name: return el return None # handle restored settings from < 3.3.9 when attr_* were stored # by name if isinstance(self.attr_x, str): self.attr_x = findvar(self.attr_x, self.xy_model) if isinstance(self.attr_y, str): self.attr_y = findvar(self.attr_y, self.xy_model) if isinstance(self.graph.attr_label, str): self.graph.attr_label = findvar( self.graph.attr_label, self.label_model) if isinstance(self.graph.attr_color, str): self.graph.attr_color = findvar( self.graph.attr_color, self.color_model) if isinstance(self.graph.attr_shape, str): self.graph.attr_shape = findvar( self.graph.attr_shape, self.shape_model) if isinstance(self.graph.attr_size, str): self.graph.attr_size = findvar( self.graph.attr_size, self.size_model) def add_data(self, time=0.4): if self.data and len(self.data) > 2000: return self.__timer.stop() data_sample = self.sql_data.sample_time(time, no_cache=True) if data_sample: data_sample.download_data(2000, partial=True) data = Table(data_sample) self.data = Table.concatenate((self.data, data), axis=0) self.data_metas_X = self.move_primitive_metas_to_X(self.data) self.handleNewSignals() def switch_sampling(self): self.__timer.stop() if self.auto_sample and self.sql_data: self.add_data() self.__timer.start() def move_primitive_metas_to_X(self, data): if data is not None: new_attrs = [a for a in data.domain.attributes + data.domain.metas if a.is_primitive()] new_metas = [m for m in data.domain.metas if not m.is_primitive()] data = Table.from_table(Domain(new_attrs, data.domain.class_vars, new_metas), data) return data def set_subset_data(self, subset_data): self.warning() if isinstance(subset_data, SqlTable): if subset_data.approx_len() < AUTO_DL_LIMIT: subset_data = Table(subset_data) else: self.warning("Data subset does not support large Sql tables") subset_data = None self.subset_data = self.move_primitive_metas_to_X(subset_data) self.controls.graph.alpha_value.setEnabled(subset_data is None) # called when all signals are received, so the graph is updated only once def handleNewSignals(self): self.graph.new_data(self.data_metas_X, self.subset_data) if self.attribute_selection_list and \ all(attr in self.graph.domain for attr in self.attribute_selection_list): self.attr_x = self.attribute_selection_list[0] self.attr_y = self.attribute_selection_list[1] self.attribute_selection_list = None self.update_graph() self.cb_class_density.setEnabled(self.graph.can_draw_density()) self.cb_reg_line.setEnabled(self.graph.can_draw_regresssion_line()) self.apply_selection() self.unconditional_commit() def apply_selection(self): """Apply selection saved in workflow.""" if self.data is not None and self.selection is not None: self.graph.selection = np.zeros(len(self.data), dtype=np.uint8) self.selection = [x for x in self.selection if x < len(self.data)] self.graph.selection[self.selection] = 1 self.graph.update_colors(keep_colors=True) def set_shown_attributes(self, attributes): if attributes and len(attributes) >= 2: self.attribute_selection_list = attributes[:2] else: self.attribute_selection_list = None def get_shown_attributes(self): return self.attr_x, self.attr_y def init_attr_values(self): domain = self.data and self.data.domain for model in self.models: model.set_domain(domain) self.attr_x = self.xy_model[0] if self.xy_model else None self.attr_y = self.xy_model[1] if len(self.xy_model) >= 2 \ else self.attr_x self.graph.attr_color = domain and self.data.domain.class_var or None self.graph.attr_shape = None self.graph.attr_size = None self.graph.attr_label = None def set_attr(self, attr_x, attr_y): self.attr_x, self.attr_y = attr_x, attr_y self.update_attr() def update_attr(self): self.update_graph() self.cb_class_density.setEnabled(self.graph.can_draw_density()) self.cb_reg_line.setEnabled(self.graph.can_draw_regresssion_line()) self.send_features() def update_colors(self): self.graph.update_colors() self.cb_class_density.setEnabled(self.graph.can_draw_density()) def update_density(self): self.update_graph(reset_view=False) def update_regression_line(self): self.update_graph(reset_view=False) def update_graph(self, reset_view=True, **_): axis = self.graph.plot_widget.getAxis("left") axis.textWidth = 0 self.graph.zoomStack = [] if self.graph.data is None: return self.graph.update_data(self.attr_x, self.attr_y, reset_view) def selection_changed(self): self.send_data() @staticmethod def create_groups_table(data, selection): if data is None: return None names = [var.name for var in data.domain.variables + data.domain.metas] name = get_next_name(names, "Selection group") metas = data.domain.metas + ( DiscreteVariable( name, ["Unselected"] + ["G{}".format(i + 1) for i in range(np.max(selection))]), ) domain = Domain(data.domain.attributes, data.domain.class_vars, metas) table = Table( domain, data.X, data.Y, metas=np.hstack((data.metas, selection.reshape(len(data), 1)))) table.attributes = data.attributes table.ids = data.ids return table def send_data(self): selected = None selection = None # TODO: Implement selection for sql data graph = self.graph if isinstance(self.data, SqlTable): selected = self.data elif self.data is not None: selection = graph.get_selection() if len(selection) > 0: selected = self.data[selection] if graph.selection is not None and np.max(graph.selection) > 1: annotated = self.create_groups_table(self.data, graph.selection) else: annotated = create_annotated_table(self.data, selection) self.send("Selected Data", selected) self.send(ANNOTATED_DATA_SIGNAL_NAME, annotated) # Store current selection in a setting that is stored in workflow if self.selection is not None and len(selection): self.selection = list(selection) def send_features(self): features = None if self.attr_x or self.attr_y: dom = Domain([], metas=(StringVariable(name="feature"),)) features = Table(dom, [[self.attr_x], [self.attr_y]]) features.name = "Features" self.send("Features", features) def commit(self): self.send_data() self.send_features() def get_widget_name_extension(self): if self.data is not None: return "{} vs {}".format(self.attr_x.name, self.attr_y.name) def send_report(self): if self.data is None: return def name(var): return var and var.name caption = report.render_items_vert(( ("Color", name(self.graph.attr_color)), ("Label", name(self.graph.attr_label)), ("Shape", name(self.graph.attr_shape)), ("Size", name(self.graph.attr_size)), ("Jittering", (self.attr_x.is_discrete or self.attr_y.is_discrete or self.graph.jitter_continuous) and self.graph.jitter_size))) self.report_plot() if caption: self.report_caption(caption) def closeContext(self): if self.current_context is not None: # When dataset changes, forget selection self.selection = None super().closeContext() def onDeleteWidget(self): super().onDeleteWidget() self.graph.plot_widget.getViewBox().deleteLater() self.graph.plot_widget.clear()
class OWSparkSQLTableContext(SharedSparkContext, widget.OWWidget): priority = 1 name = "Hive Table" description = "Create a Spark DataFrame from a Hive Table" icon = "../icons/Hive.png" outputs = [("DataFrame", pyspark.sql.DataFrame, widget.Dynamic)] want_main_area = False resizing_enabled = True databases = ['default'] tables = list() out_df = None database = '' table = '' saved_gui_params = Setting(OrderedDict()) def __init__(self): super().__init__() # The main label of the Control's GUI. # gui.label(self.controlArea, self, "Spark Context") # Create parameters Box. box = gui.widgetBox(self.controlArea, "Spark SQL Table", addSpace=True) self.gui_parameters = OrderedDict() if self.hc: self.databases = [ i.result for i in self.hc.sql("show databases").collect() ] default_value = self.saved_gui_params.get('database', 'default') if default_value not in self.databases: self.databases.append(default_value) self.database = default_value self.refresh_databases_btn = gui.button( box, self, label='Refresh databases', callback=self.fill_database_list) self.gui_parameters['database'] = GuiParam( parent_widget=box, list_values=self.databases, label='Database', default_value=default_value, callback_func=self.refresh_database) default_value = self.saved_gui_params.get('table', '') self.gui_parameters['table'] = GuiParam(parent_widget=box, label='Table', default_value=default_value, list_values=[default_value]) self.refresh_database(self.gui_parameters['database'].get_value()) action_box = gui.widgetBox(box) # Action Button self.create_sc_btn = gui.button(action_box, self, label='Submit', callback=self.submit) def fill_database_list(self): if self.hc: self.databases = [ i.result for i in self.hc.sql("show databases").collect() ] self.gui_parameters['database'].update(values=self.databases) def refresh_database(self, text): if self.hc is None: return self.database = text if self.databases and self.databases != '': self.tables = self.hc.tableNames(self.database) self.gui_parameters['table'].update(values=self.tables) def dummy_func(self): pass def submit(self): if self.hc is None: return self.database = self.gui_parameters['database'].get_value() self.table = self.gui_parameters['table'].get_value() self.out_df = self.hc.table(self.database + '.' + self.table) self.send("DataFrame", self.out_df) self.update_saved_gui_parameters() self.hide() def update_saved_gui_parameters(self): for k in self.gui_parameters: self.saved_gui_params[k] = self.gui_parameters[k].get_value()
class OWTreeLearner(OWBaseLearner): """Tree algorithm with forward pruning.""" name = "Tree" description = "A tree algorithm with forward pruning." icon = "icons/Tree.svg" replaces = [ "Orange.widgets.classify.owclassificationtree.OWClassificationTree", "Orange.widgets.regression.owregressiontree.OWRegressionTree", "Orange.widgets.classify.owclassificationtree.OWTreeLearner", "Orange.widgets.regression.owregressiontree.OWTreeLearner", ] priority = 30 keywords = [] LEARNER = TreeLearner binary_trees = Setting(True) limit_min_leaf = Setting(True) min_leaf = Setting(2) limit_min_internal = Setting(True) min_internal = Setting(5) limit_depth = Setting(True) max_depth = Setting(100) # Classification only settings limit_majority = Setting(True) sufficient_majority = Setting(95) spin_boxes = ( ("Min. number of instances in leaves: ", "limit_min_leaf", "min_leaf", 1, 1000), ("Do not split subsets smaller than: ", "limit_min_internal", "min_internal", 1, 1000), ("Limit the maximal tree depth to: ", "limit_depth", "max_depth", 1, 1000)) classification_spin_boxes = ( ("Stop when majority reaches [%]: ", "limit_majority", "sufficient_majority", 51, 100),) def add_main_layout(self): box = gui.widgetBox(self.controlArea, 'Parameters') # the checkbox is put into vBox for alignemnt with other checkboxes gui.checkBox(gui.vBox(box), self, "binary_trees", "Induce binary tree", callback=self.settings_changed) for label, check, setting, fromv, tov in self.spin_boxes: gui.spin(box, self, setting, fromv, tov, label=label, checked=check, alignment=Qt.AlignRight, callback=self.settings_changed, checkCallback=self.settings_changed, controlWidth=80) def add_classification_layout(self, box): for label, check, setting, minv, maxv in self.classification_spin_boxes: gui.spin(box, self, setting, minv, maxv, label=label, checked=check, alignment=Qt.AlignRight, callback=self.settings_changed, controlWidth=80, checkCallback=self.settings_changed) def learner_kwargs(self): # Pylint doesn't get our Settings # pylint: disable=invalid-sequence-index return dict( max_depth=(None, self.max_depth)[self.limit_depth], min_samples_split=(2, self.min_internal)[self.limit_min_internal], min_samples_leaf=(1, self.min_leaf)[self.limit_min_leaf], binarize=self.binary_trees, preprocessors=self.preprocessors, sufficient_majority=(1, self.sufficient_majority / 100)[ self.limit_majority]) def create_learner(self): # pylint: disable=not-callable return self.LEARNER(**self.learner_kwargs()) def get_learner_parameters(self): from Orange.widgets.report import plural_w items = OrderedDict() items["Pruning"] = ", ".join(s for s, c in ( (plural_w("at least {number} instance{s} in leaves", self.min_leaf), self.limit_min_leaf), (plural_w("at least {number} instance{s} in internal nodes", self.min_internal), self.limit_min_internal), ("maximum depth {}".format(self.max_depth), self.limit_depth) ) if c) or "None" if self.limit_majority: items["Splitting"] = "Stop splitting when majority reaches %d%% " \ "(classification only)" % \ self.sufficient_majority items["Binary trees"] = ("No", "Yes")[self.binary_trees] return items
class OWNYT(OWWidget): class APICredentialsDialog(OWWidget): name = "New York Times API key" want_main_area = False resizing_enabled = False cm_key = CredentialManager('NY Times API Key') key_input = '' class Error(OWWidget.Error): invalid_credentials = Msg( 'This credentials are invalid. ' 'Check the key and your internet connection.') def __init__(self, parent): super().__init__() self.parent = parent self.api = None form = QFormLayout() form.setContentsMargins(5, 5, 5, 5) self.key_edit = gui.lineEdit(self, self, 'key_input', controlWidth=400) form.addRow('Key:', self.key_edit) self.controlArea.layout().addLayout(form) self.submit_button = gui.button(self.controlArea, self, "OK", self.accept) self.load_credentials() def load_credentials(self): self.key_edit.setText(self.cm_key.key) def save_credentials(self): self.cm_key.key = self.key_input def check_credentials(self): api = NYT(self.key_input) if api.api_key_valid(): self.save_credentials() else: api = None self.api = api def accept(self, silent=False): if not silent: self.Error.invalid_credentials.clear() self.check_credentials() if self.api: self.parent.update_api(self.api) super().accept() elif not silent: self.Error.invalid_credentials() name = "NY Times" description = "Fetch articles from the New York Times search API." icon = "icons/NYTimes.svg" priority = 130 class Outputs: corpus = Output("Corpus", Corpus) want_main_area = False resizing_enabled = False recent_queries = Setting([]) date_from = Setting((datetime.now().date() - timedelta(365))) date_to = Setting(datetime.now().date()) attributes = [ feat.name for feat, _ in NYT.metas if isinstance(feat, StringVariable) ] text_includes = Setting([feat.name for feat in NYT.text_features]) class Warning(OWWidget.Warning): no_text_fields = Msg( 'Text features are inferred when none are selected.') class Error(OWWidget.Error): no_api = Msg('Please provide a valid API key.') no_query = Msg('Please provide a query.') offline = Msg('No internet connection.') api_error = Msg('API error: {}') rate_limit = Msg('Rate limit exceeded. Please try again later.') def __init__(self): super().__init__() self.corpus = None self.nyt_api = None self.output_info = '' self.num_retrieved = 0 self.num_all = 0 # API key self.api_dlg = self.APICredentialsDialog(self) self.api_dlg.accept(silent=True) gui.button(self.controlArea, self, 'Article API Key', callback=self.api_dlg.exec_, focusPolicy=Qt.NoFocus) # Query query_box = gui.widgetBox(self.controlArea, 'Query', addSpace=True) self.query_box = QueryBox(query_box, self, self.recent_queries, callback=self.new_query_input) # Year box date_box = gui.hBox(query_box) DatePickerInterval(date_box, self, 'date_from', 'date_to', min_date=MIN_DATE, max_date=date.today(), margin=(0, 3, 0, 0)) # Text includes features self.controlArea.layout().addWidget( CheckListLayout('Text includes', self, 'text_includes', self.attributes, cols=2, callback=self.set_text_features)) # Output info_box = gui.hBox(self.controlArea, 'Output') gui.label(info_box, self, 'Articles: %(output_info)s') # Buttons self.button_box = gui.hBox(self.controlArea) self.search_button = gui.button(self.button_box, self, 'Search', self.start_stop, focusPolicy=Qt.NoFocus) def new_query_input(self): self.search.stop() self.run_search() def start_stop(self): if self.search.running: self.search.stop() else: self.query_box.synchronize(silent=True) self.run_search() @gui_require('nyt_api', 'no_api') @gui_require('recent_queries', 'no_query') def run_search(self): self.search() @asynchronous def search(self): return self.nyt_api.search(self.recent_queries[0], self.date_from, self.date_to, on_progress=self.progress_with_info, should_break=self.search.should_break) @search.callback(should_raise=False) def progress_with_info(self, n_retrieved, n_all): self.progressBarSet( 100 * (n_retrieved / n_all if n_all else 1)) # prevent division by 0 self.num_all = n_all self.num_retrieved = n_retrieved self.update_info_label() @search.on_start def on_start(self): self.Error.api_error.clear() self.Error.rate_limit.clear() self.Error.offline.clear() self.num_all, self.num_retrieved = 0, 0 self.update_info_label() self.progressBarInit() self.search_button.setText('Stop') self.Outputs.corpus.send(None) @search.on_result def on_result(self, result): self.search_button.setText('Search') self.corpus = result self.set_text_features() self.progressBarFinished() def update_info_label(self): self.output_info = '{}/{}'.format(self.num_retrieved, self.num_all) def set_text_features(self): self.Warning.no_text_fields.clear() if not self.text_includes: self.Warning.no_text_fields() if self.corpus is not None: vars_ = [ var for var in self.corpus.domain.metas if var.name in self.text_includes ] self.corpus.set_text_features(vars_ or None) self.Outputs.corpus.send(self.corpus) def update_api(self, api): self.nyt_api = api self.Error.no_api.clear() self.nyt_api.on_error = self.Error.api_error self.nyt_api.on_rate_limit = self.Error.rate_limit self.nyt_api.on_no_connection = self.Error.offline def send_report(self): self.report_items([ ('Query', self.recent_queries[0] if self.recent_queries else ''), ('Date from', self.date_from), ('Date to', self.date_to), ('Text includes', ', '.join(self.text_includes)), ('Output', self.output_info or 'Nothing'), ])
class OWAggregateColumns(widget.OWWidget): name = "Aggregate Columns" description = "Compute a sum, max, min ... of selected columns." category = "Transform" icon = "icons/AggregateColumns.svg" priority = 1200 keywords = ["aggregate", "sum", "product", "max", "min", "mean", "median", "variance"] class Inputs: data = Input("Data", Table, default=True) class Outputs: data = Output("Data", Table) want_main_area = False settingsHandler = DomainContextHandler() variables: List[Variable] = ContextSetting([]) operation = Setting("Sum") var_name = Setting("agg") auto_apply = Setting(True) Operations = {"Sum": np.nansum, "Product": np.nanprod, "Min": np.nanmin, "Max": np.nanmax, "Mean": np.nanmean, "Variance": np.nanvar, "Median": np.nanmedian} TimePreserving = ("Min", "Max", "Mean", "Median") def __init__(self): super().__init__() self.data = None box = gui.vBox(self.controlArea, box=True) self.variable_model = DomainModel( order=DomainModel.MIXED, valid_types=(ContinuousVariable, )) var_list = gui.listView( box, self, "variables", model=self.variable_model, callback=self.commit.deferred ) var_list.setSelectionMode(var_list.ExtendedSelection) combo = gui.comboBox( box, self, "operation", label="Operator: ", orientation=Qt.Horizontal, items=list(self.Operations), sendSelectedValue=True, callback=self.commit.deferred ) combo.setSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed) gui.lineEdit( box, self, "var_name", label="Variable name: ", orientation=Qt.Horizontal, callback=self.commit.deferred ) gui.auto_apply(self.controlArea, self) @Inputs.data def set_data(self, data: Table = None): self.closeContext() self.variables.clear() self.data = data if self.data: self.variable_model.set_domain(data.domain) self.openContext(data) else: self.variable_model.set_domain(None) self.commit.now() @gui.deferred def commit(self): augmented = self._compute_data() self.Outputs.data.send(augmented) def _compute_data(self): if not self.data or not self.variables: return self.data new_col = self._compute_column() new_var = self._new_var() return self.data.add_column(new_var, new_col) def _compute_column(self): arr = np.empty((len(self.data), len(self.variables))) for i, var in enumerate(self.variables): arr[:, i] = self.data.get_column_view(var)[0].astype(float) func = self.Operations[self.operation] return func(arr, axis=1) def _new_var_name(self): return get_unique_names(self.data.domain, self.var_name) def _new_var(self): name = self._new_var_name() if self.operation in self.TimePreserving \ and all(isinstance(var, TimeVariable) for var in self.variables): return TimeVariable(name) return ContinuousVariable(name) def send_report(self): # fp for self.variables, pylint: disable=unsubscriptable-object if not self.data or not self.variables: return var_list = ", ".join(f"'{var.name}'" for var in self.variables[:31][:-1]) if len(self.variables) > 30: var_list += f" and {len(self.variables) - 30} others" else: var_list += f" and '{self.variables[-1].name}'" self.report_items(( ("Output:", f"'{self._new_var_name()}' as {self.operation.lower()} of {var_list}" ), ))
class OWKNNLearner(OWProvidesLearner, widget.OWWidget): name = "Nearest Neighbors" description = "k-nearest neighbors classification algorithm." icon = "icons/KNN.svg" inputs = [("Data", Table, "set_data")] + OWProvidesLearner.inputs outputs = [("Learner", KNNLearner), ("Classifier", SklModel)] want_main_area = False resizing_enabled = False weights = ["uniform", "distance"] metrics = ["euclidean", "manhattan", "chebyshev", "mahalanobis"] learner_name = Setting("kNN") n_neighbors = Setting(5) metric_index = Setting(0) weight_type = Setting(0) def __init__(self): super().__init__() self.data = None self.preprocessors = None box = gui.widgetBox(self.controlArea, "Learner/Classifier Name") gui.lineEdit(box, self, "learner_name") box = gui.widgetBox(self.controlArea, "Neighbors") gui.spin(box, self, "n_neighbors", 1, 100, label="Number of neighbors", alignment=Qt.AlignRight) gui.comboBox(box, self, "metric_index", label="Metric", orientation="horizontal", items=[i.capitalize() for i in self.metrics]) gui.comboBox(box, self, "weight_type", label='Weight', orientation="horizontal", items=[i.capitalize() for i in self.weights]) g = QHBoxLayout() self.controlArea.layout().addLayout(g) apply = gui.button(None, self, "Apply", callback=self.apply, default=True) g.layout().addWidget(self.report_button) g.layout().addWidget(apply) self.apply() @check_sql_input def set_data(self, data): self.data = data if data is not None: self.apply() LEARNER = KNNLearner def apply(self): learner = self.LEARNER( n_neighbors=self.n_neighbors, metric=self.metrics[self.metric_index], weights=self.weights[self.weight_type], preprocessors=self.preprocessors ) learner.name = self.learner_name classifier = None if self.data is not None: self.error(0) if not learner.check_learner_adequacy(self.data.domain): self.error(0, learner.learner_adequacy_err_msg) else: classifier = learner(self.data) classifier.name = self.learner_name self.send("Learner", learner) self.send("Classifier", classifier) def send_report(self): self.report_items((("Name", self.learner_name),)) self.report_items("Model parameters", ( ("Number of neighbours", self.n_neighbors), ("Metric", self.metrics[self.metric_index].capitalize()), ("Weight", self.weights[self.weight_type].capitalize()))) if self.data: self.report_data("Data", self.data)
class OWSelectAttributes(widget.OWWidget): name = "Select Columns" description = "Select columns from the data table and assign them to " \ "data features, classes or meta variables." icon = "icons/SelectColumns.svg" priority = 100 class Inputs: data = Input("Data", Table) class Outputs: data = Output("Data", Table) features = Output("Features", widget.AttributeList, dynamic=False) want_main_area = False want_control_area = True settingsHandler = SelectAttributesDomainContextHandler() domain_role_hints = ContextSetting({}) auto_commit = Setting(True) def __init__(self): super().__init__() self.controlArea = QWidget(self.controlArea) self.layout().addWidget(self.controlArea) layout = QGridLayout() self.controlArea.setLayout(layout) layout.setContentsMargins(4, 4, 4, 4) box = gui.vBox(self.controlArea, "Available Variables", addToLayout=False) self.available_attrs = VariableListModel(enable_dnd=True) filter_edit, self.available_attrs_view = variables_filter( parent=self, model=self.available_attrs) box.layout().addWidget(filter_edit) def dropcompleted(action): if action == Qt.MoveAction: self.commit() self.available_attrs_view.selectionModel().selectionChanged.connect( partial(self.update_interface_state, self.available_attrs_view)) self.available_attrs_view.selectionModel().selectionChanged.connect( partial(self.update_interface_state, self.available_attrs_view)) self.available_attrs_view.dragDropActionDidComplete.connect( dropcompleted) box.layout().addWidget(self.available_attrs_view) layout.addWidget(box, 0, 0, 3, 1) box = gui.vBox(self.controlArea, "Features", addToLayout=False) self.used_attrs = VariableListModel(enable_dnd=True) self.used_attrs_view = VariablesListItemView( acceptedType=(Orange.data.DiscreteVariable, Orange.data.ContinuousVariable)) self.used_attrs_view.setModel(self.used_attrs) self.used_attrs_view.selectionModel().selectionChanged.connect( partial(self.update_interface_state, self.used_attrs_view)) self.used_attrs_view.dragDropActionDidComplete.connect(dropcompleted) box.layout().addWidget(self.used_attrs_view) layout.addWidget(box, 0, 2, 1, 1) box = gui.vBox(self.controlArea, "Target Variable", addToLayout=False) self.class_attrs = ClassVarListItemModel(enable_dnd=True) self.class_attrs_view = ClassVariableItemView( acceptedType=(Orange.data.DiscreteVariable, Orange.data.ContinuousVariable)) self.class_attrs_view.setModel(self.class_attrs) self.class_attrs_view.selectionModel().selectionChanged.connect( partial(self.update_interface_state, self.class_attrs_view)) self.class_attrs_view.dragDropActionDidComplete.connect(dropcompleted) self.class_attrs_view.setMaximumHeight(72) box.layout().addWidget(self.class_attrs_view) layout.addWidget(box, 1, 2, 1, 1) box = gui.vBox(self.controlArea, "Meta Attributes", addToLayout=False) self.meta_attrs = VariableListModel(enable_dnd=True) self.meta_attrs_view = VariablesListItemView( acceptedType=Orange.data.Variable) self.meta_attrs_view.setModel(self.meta_attrs) self.meta_attrs_view.selectionModel().selectionChanged.connect( partial(self.update_interface_state, self.meta_attrs_view)) self.meta_attrs_view.dragDropActionDidComplete.connect(dropcompleted) box.layout().addWidget(self.meta_attrs_view) layout.addWidget(box, 2, 2, 1, 1) bbox = gui.vBox(self.controlArea, addToLayout=False, margin=0) layout.addWidget(bbox, 0, 1, 1, 1) self.up_attr_button = gui.button(bbox, self, "Up", callback=partial( self.move_up, self.used_attrs_view)) self.move_attr_button = gui.button(bbox, self, ">", callback=partial( self.move_selected, self.used_attrs_view)) self.down_attr_button = gui.button(bbox, self, "Down", callback=partial( self.move_down, self.used_attrs_view)) bbox = gui.vBox(self.controlArea, addToLayout=False, margin=0) layout.addWidget(bbox, 1, 1, 1, 1) self.up_class_button = gui.button(bbox, self, "Up", callback=partial( self.move_up, self.class_attrs_view)) self.move_class_button = gui.button(bbox, self, ">", callback=partial( self.move_selected, self.class_attrs_view, exclusive=False)) self.down_class_button = gui.button(bbox, self, "Down", callback=partial( self.move_down, self.class_attrs_view)) bbox = gui.vBox(self.controlArea, addToLayout=False, margin=0) layout.addWidget(bbox, 2, 1, 1, 1) self.up_meta_button = gui.button(bbox, self, "Up", callback=partial( self.move_up, self.meta_attrs_view)) self.move_meta_button = gui.button(bbox, self, ">", callback=partial( self.move_selected, self.meta_attrs_view)) self.down_meta_button = gui.button(bbox, self, "Down", callback=partial( self.move_down, self.meta_attrs_view)) autobox = gui.auto_commit(None, self, "auto_commit", "Send") layout.addWidget(autobox, 3, 0, 1, 3) reset = gui.button(None, self, "Reset", callback=self.reset, width=120) autobox.layout().insertWidget(0, reset) autobox.layout().insertStretch(1, 20) layout.setRowStretch(0, 4) layout.setRowStretch(1, 0) layout.setRowStretch(2, 2) layout.setHorizontalSpacing(0) self.controlArea.setLayout(layout) self.data = None self.output_data = None self.original_completer_items = [] self.resize(500, 600) @Inputs.data def set_data(self, data=None): self.update_domain_role_hints() self.closeContext() self.data = data if data is not None: self.openContext(data) all_vars = data.domain.variables + data.domain.metas var_sig = lambda attr: (attr.name, vartype(attr)) domain_hints = { var_sig(attr): ("attribute", i) for i, attr in enumerate(data.domain.attributes) } domain_hints.update({ var_sig(attr): ("meta", i) for i, attr in enumerate(data.domain.metas) }) if data.domain.class_vars: domain_hints.update({ var_sig(attr): ("class", i) for i, attr in enumerate(data.domain.class_vars) }) # update the hints from context settings domain_hints.update(self.domain_role_hints) attrs_for_role = lambda role: [ (domain_hints[var_sig(attr)][1], attr) for attr in all_vars if domain_hints[var_sig(attr)][0] == role ] attributes = [ attr for place, attr in sorted(attrs_for_role("attribute"), key=lambda a: a[0]) ] classes = [ attr for place, attr in sorted(attrs_for_role("class"), key=lambda a: a[0]) ] metas = [ attr for place, attr in sorted(attrs_for_role("meta"), key=lambda a: a[0]) ] available = [ attr for place, attr in sorted(attrs_for_role("available"), key=lambda a: a[0]) ] self.used_attrs[:] = attributes self.class_attrs[:] = classes self.meta_attrs[:] = metas self.available_attrs[:] = available else: self.used_attrs[:] = [] self.class_attrs[:] = [] self.meta_attrs[:] = [] self.available_attrs[:] = [] self.unconditional_commit() def update_domain_role_hints(self): """ Update the domain hints to be stored in the widgets settings. """ hints_from_model = lambda role, model: [( (attr.name, vartype(attr)), (role, i)) for i, attr in enumerate(model)] hints = dict(hints_from_model("available", self.available_attrs)) hints.update(hints_from_model("attribute", self.used_attrs)) hints.update(hints_from_model("class", self.class_attrs)) hints.update(hints_from_model("meta", self.meta_attrs)) self.domain_role_hints = hints def selected_rows(self, view): """ Return the selected rows in the view. """ rows = view.selectionModel().selectedRows() model = view.model() if isinstance(model, QSortFilterProxyModel): rows = [model.mapToSource(r) for r in rows] return [r.row() for r in rows] def move_rows(self, view, rows, offset): model = view.model() newrows = [min(max(0, row + offset), len(model) - 1) for row in rows] for row, newrow in sorted(zip(rows, newrows), reverse=offset > 0): model[row], model[newrow] = model[newrow], model[row] selection = QItemSelection() for nrow in newrows: index = model.index(nrow, 0) selection.select(index, index) view.selectionModel().select(selection, QItemSelectionModel.ClearAndSelect) self.commit() def move_up(self, view): selected = self.selected_rows(view) self.move_rows(view, selected, -1) def move_down(self, view): selected = self.selected_rows(view) self.move_rows(view, selected, 1) def move_selected(self, view, exclusive=False): if self.selected_rows(view): self.move_selected_from_to(view, self.available_attrs_view) elif self.selected_rows(self.available_attrs_view): self.move_selected_from_to(self.available_attrs_view, view, exclusive) def move_selected_from_to(self, src, dst, exclusive=False): self.move_from_to(src, dst, self.selected_rows(src), exclusive) def move_from_to(self, src, dst, rows, exclusive=False): src_model = source_model(src) attrs = [src_model[r] for r in rows] for s1, s2 in reversed(list(slices(rows))): del src_model[s1:s2] dst_model = source_model(dst) dst_model.extend(attrs) self.commit() def update_interface_state(self, focus=None, selected=None, deselected=None): for view in [ self.available_attrs_view, self.used_attrs_view, self.class_attrs_view, self.meta_attrs_view ]: if view is not focus and not view.hasFocus( ) and self.selected_rows(view): view.selectionModel().clear() def selected_vars(view): model = source_model(view) return [model[i] for i in self.selected_rows(view)] available_selected = selected_vars(self.available_attrs_view) attrs_selected = selected_vars(self.used_attrs_view) class_selected = selected_vars(self.class_attrs_view) meta_selected = selected_vars(self.meta_attrs_view) available_types = set(map(type, available_selected)) all_primitive = all(var.is_primitive() for var in available_types) move_attr_enabled = (available_selected and all_primitive) or \ attrs_selected self.move_attr_button.setEnabled(bool(move_attr_enabled)) if move_attr_enabled: self.move_attr_button.setText(">" if available_selected else "<") move_class_enabled = (all_primitive and available_selected) or class_selected self.move_class_button.setEnabled(bool(move_class_enabled)) if move_class_enabled: self.move_class_button.setText(">" if available_selected else "<") move_meta_enabled = available_selected or meta_selected self.move_meta_button.setEnabled(bool(move_meta_enabled)) if move_meta_enabled: self.move_meta_button.setText(">" if available_selected else "<") def commit(self): self.update_domain_role_hints() if self.data is not None: attributes = list(self.used_attrs) class_var = list(self.class_attrs) metas = list(self.meta_attrs) domain = Orange.data.Domain(attributes, class_var, metas) newdata = self.data.transform(domain) self.output_data = newdata self.Outputs.data.send(newdata) self.Outputs.features.send(widget.AttributeList(attributes)) else: self.output_data = None self.Outputs.data.send(None) self.Outputs.features.send(None) def reset(self): if self.data is not None: self.available_attrs[:] = [] self.used_attrs[:] = self.data.domain.attributes self.class_attrs[:] = self.data.domain.class_vars self.meta_attrs[:] = self.data.domain.metas self.update_domain_role_hints() self.commit() def send_report(self): if not self.data or not self.output_data: return in_domain, out_domain = self.data.domain, self.output_data.domain self.report_domain("Input data", self.data.domain) if (in_domain.attributes, in_domain.class_vars, in_domain.metas) == (out_domain.attributes, out_domain.class_vars, out_domain.metas): self.report_paragraph("Output data", "No changes.") else: self.report_domain("Output data", self.output_data.domain) diff = list( set(in_domain.variables + in_domain.metas) - set(out_domain.variables + out_domain.metas)) if diff: text = "%i (%s)" % (len(diff), ", ".join(x.name for x in diff)) self.report_items((("Removed", text), ))
class OWBoxPlot(widget.OWWidget): """ Here's how the widget's functions call each other: - `set_data` is a signal handler fills the list boxes and calls `grouping_changed`. - `grouping_changed` handles changes of grouping attribute: it enables or disables the box for ordering, orders attributes and calls `attr_changed`. - `attr_changed` handles changes of attribute. It recomputes box data by calling `compute_box_data`, shows the appropriate display box (discrete/continuous) and then calls`layout_changed` - `layout_changed` constructs all the elements for the scene (as lists of QGraphicsItemGroup) and calls `display_changed`. It is called when the attribute or grouping is changed (by attr_changed) and on resize event. - `display_changed` puts the elements corresponding to the current display settings on the scene. It is called when the elements are reconstructed (layout is changed due to selection of attributes or resize event), or when the user changes display settings or colors. For discrete attributes, the flow is a bit simpler: the elements are not constructed in advance (by layout_changed). Instead, layout_changed and display_changed call display_changed_disc that draws everything. """ name = "Box Plot" description = "Visualize the distribution of feature values in a box plot." icon = "icons/BoxPlot.svg" priority = 100 class Inputs: data = Input("Data", Orange.data.Table) class Outputs: selected_data = Output("Selected Data", Orange.data.Table, default=True) annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Orange.data.Table) #: Comparison types for continuous variables CompareNone, CompareMedians, CompareMeans = 0, 1, 2 settingsHandler = DomainContextHandler() conditions = ContextSetting([]) attribute = ContextSetting(None) order_by_importance = Setting(False) group_var = ContextSetting(None) show_annotations = Setting(True) compare = Setting(CompareMeans) stattest = Setting(0) sig_threshold = Setting(0.05) stretched = Setting(True) show_labels = Setting(True) auto_commit = Setting(True) _sorting_criteria_attrs = { CompareNone: "", CompareMedians: "median", CompareMeans: "mean" } _pen_axis_tick = QPen(Qt.white, 5) _pen_axis = QPen(Qt.darkGray, 3) _pen_median = QPen(QBrush(QColor(0xff, 0xff, 0x00)), 2) _pen_paramet = QPen(QBrush(QColor(0x33, 0x00, 0xff)), 2) _pen_dotted = QPen(QBrush(QColor(0x33, 0x00, 0xff)), 1) _pen_dotted.setStyle(Qt.DotLine) _post_line_pen = QPen(Qt.lightGray, 2) _post_grp_pen = QPen(Qt.lightGray, 4) for pen in (_pen_paramet, _pen_median, _pen_dotted, _pen_axis, _pen_axis_tick, _post_line_pen, _post_grp_pen): pen.setCosmetic(True) pen.setCapStyle(Qt.RoundCap) pen.setJoinStyle(Qt.RoundJoin) _pen_axis_tick.setCapStyle(Qt.FlatCap) _box_brush = QBrush(QColor(0x33, 0x88, 0xff, 0xc0)) _axis_font = QFont() _axis_font.setPixelSize(12) _label_font = QFont() _label_font.setPixelSize(11) _attr_brush = QBrush(QColor(0x33, 0x00, 0xff)) graph_name = "box_scene" def __init__(self): super().__init__() self.stats = [] self.dataset = None self.posthoc_lines = [] self.label_txts = self.mean_labels = self.boxes = self.labels = \ self.label_txts_all = self.attr_labels = self.order = [] self.p = -1.0 self.scale_x = self.scene_min_x = self.scene_width = 0 self.label_width = 0 order = (DomainModel.CLASSES, DomainModel.METAS, DomainModel.ATTRIBUTES) self.attrs = DomainModel(order=order, valid_types=DomainModel.PRIMITIVE) view = gui.listView(self.controlArea, self, "attribute", box="Variable", model=self.attrs, callback=self.attr_changed) view.setMinimumSize(QSize(30, 30)) # Any other policy than Ignored will let the QListBox's scrollbar # set the minimal height (see the penultimate paragraph of # http://doc.qt.io/qt-4.8/qabstractscrollarea.html#addScrollBarWidget) view.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Ignored) gui.separator(view.box, 6, 6) self.cb_order = gui.checkBox( view.box, self, "order_by_importance", "Order by relevance", tooltip="Order by 𝜒² or ANOVA over the subgroups", callback=self.apply_sorting) self.group_vars = DomainModel(order=order, placeholder="None", valid_types=Orange.data.DiscreteVariable) self.group_vars.clear() # Remove 'None' from the list view view = gui.listView(self.controlArea, self, "group_var", box="Subgroups", model=self.group_vars, callback=self.grouping_changed) view.setMinimumSize(QSize(30, 30)) # See the comment above view.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Ignored) # TODO: move Compare median/mean to grouping box # The vertical size policy is needed to let only the list views expand self.display_box = gui.vBox(self.controlArea, "Display", sizePolicy=(QSizePolicy.Minimum, QSizePolicy.Maximum)) gui.checkBox(self.display_box, self, "show_annotations", "Annotate", callback=self.display_changed) self.compare_rb = gui.radioButtonsInBox( self.display_box, self, 'compare', btnLabels=["No comparison", "Compare medians", "Compare means"], callback=self.layout_changed) # The vertical size policy is needed to let only the list views expand self.stretching_box = box = gui.vBox(self.controlArea, box="Display", sizePolicy=(QSizePolicy.Minimum, QSizePolicy.Maximum)) gui.checkBox(box, self, 'stretched', "Stretch bars", callback=self.display_changed) gui.checkBox(box, self, 'show_labels', "Show box labels", callback=self.display_changed) gui.auto_commit(self.controlArea, self, "auto_commit", "Send Selection", "Send Automatically") gui.vBox(self.mainArea, addSpace=True) self.box_scene = QGraphicsScene() self.box_scene.selectionChanged.connect(self.commit) self.box_view = QGraphicsView(self.box_scene) self.box_view.setRenderHints(QPainter.Antialiasing | QPainter.TextAntialiasing | QPainter.SmoothPixmapTransform) self.box_view.viewport().installEventFilter(self) self.mainArea.layout().addWidget(self.box_view) e = gui.hBox(self.mainArea, addSpace=False) self.infot1 = gui.widgetLabel(e, "<center>No test results.</center>") self.mainArea.setMinimumWidth(600) self.stats = self.dist = self.conts = [] self.is_continuous = False self.update_display_box() def sizeHint(self): return QSize(100, 500) # Vertical size is regulated by mainArea def eventFilter(self, obj, event): if obj is self.box_view.viewport() and \ event.type() == QEvent.Resize: self.layout_changed() return super().eventFilter(obj, event) # noinspection PyTypeChecker @Inputs.data def set_data(self, dataset): if dataset is not None and (not bool(dataset) or not len(dataset.domain)): dataset = None self.closeContext() self.dataset = dataset self.dist = self.stats = self.conts = [] self.group_var = None self.attribute = None if dataset: domain = dataset.domain self.group_vars.set_domain(domain) self.attrs.set_domain(domain) self.select_default_variables(domain) self.openContext(self.dataset) self.grouping_changed() else: self.reset_all_data() self.commit() def select_default_variables(self, domain): # visualize first non-class variable, group by class (if present) if len(self.attrs) > len(domain.class_vars): self.attribute = self.attrs[len(domain.class_vars)] elif self.attrs: self.attribute = self.attrs[0] if domain.class_var and domain.class_var.is_discrete: self.group_var = domain.class_var else: self.group_var = None # Reset to trigger selection via callback def apply_sorting(self): def compute_score(attr): if attr is group_var: return 3 if attr.is_continuous: # One-way ANOVA col = data.get_column_view(attr)[0].astype(float) groups = (col[group_col == i] for i in range(n_groups)) groups = (col[~np.isnan(col)] for col in groups) groups = [group for group in groups if len(group)] p = f_oneway(*groups)[1] if len(groups) > 1 else 2 else: # Chi-square with the given distribution into groups # (see degrees of freedom in computation of the p-value) if not attr.values or not group_var.values: return 2 observed = np.array( contingency.get_contingency(data, group_var, attr)) observed = observed[observed.sum(axis=1) != 0, :] observed = observed[:, observed.sum(axis=0) != 0] if min(observed.shape) < 2: return 2 expected = \ np.outer(observed.sum(axis=1), observed.sum(axis=0)) / \ np.sum(observed) p = chisquare(observed.ravel(), f_exp=expected.ravel(), ddof=n_groups - 1)[1] if math.isnan(p): return 2 return p data = self.dataset if data is None: return domain = data.domain attribute = self.attribute group_var = self.group_var if self.order_by_importance and group_var is not None: n_groups = len(group_var.values) group_col = data.get_column_view(group_var)[0] if \ domain.has_continuous_attributes( include_class=True, include_metas=True) else None self.attrs.sort(key=compute_score) else: self.attrs.set_domain(domain) self.attribute = attribute def reset_all_data(self): self.clear_scene() self.infot1.setText("") self.attrs.set_domain(None) self.group_vars.set_domain(None) self.group_vars.clear() # Remove 'None' from the list view self.is_continuous = False self.update_display_box() def grouping_changed(self): self.cb_order.setEnabled(self.group_var is not None) self.apply_sorting() self.attr_changed() def select_box_items(self): temp_cond = self.conditions.copy() for box in self.box_scene.items(): if isinstance(box, FilterGraphicsRectItem): box.setSelected( box.filter.conditions in [c.conditions for c in temp_cond]) def attr_changed(self): self.compute_box_data() self.update_display_box() self.layout_changed() if self.is_continuous: heights = 90 if self.show_annotations else 60 self.box_view.centerOn(self.scene_min_x + self.scene_width / 2, -30 - len(self.stats) * heights / 2 + 45) else: self.box_view.centerOn(self.scene_width / 2, -30 - len(self.boxes) * 40 / 2 + 45) def compute_box_data(self): attr = self.attribute if not attr: return dataset = self.dataset self.is_continuous = attr.is_continuous if dataset is None or not self.is_continuous and not attr.values or \ self.group_var and not self.group_var.values: self.stats = self.dist = self.conts = [] return if self.group_var: self.dist = [] self.conts = contingency.get_contingency(dataset, attr, self.group_var) if self.is_continuous: self.stats = [ BoxData(cont, attr, i, self.group_var) for i, cont in enumerate(self.conts) ] self.label_txts_all = self.group_var.values else: self.dist = distribution.get_distribution(dataset, attr) self.conts = [] if self.is_continuous: self.stats = [BoxData(self.dist, attr, None)] self.label_txts_all = [""] self.label_txts = [ txts for stat, txts in zip(self.stats, self.label_txts_all) if stat.n > 0 ] self.stats = [stat for stat in self.stats if stat.n > 0] def update_display_box(self): if self.is_continuous: self.stretching_box.hide() self.display_box.show() self.compare_rb.setEnabled(self.group_var is not None) else: self.stretching_box.show() self.display_box.hide() def clear_scene(self): self.closeContext() self.box_scene.clearSelection() self.box_scene.clear() self.attr_labels = [] self.labels = [] self.boxes = [] self.mean_labels = [] self.posthoc_lines = [] self.openContext(self.dataset) def layout_changed(self): attr = self.attribute if not attr: return self.clear_scene() if self.dataset is None or len(self.conts) == len(self.dist) == 0: return if not self.is_continuous: return self.display_changed_disc() self.mean_labels = [ self.mean_label(stat, attr, lab) for stat, lab in zip(self.stats, self.label_txts) ] self.draw_axis() self.boxes = [self.box_group(stat) for stat in self.stats] self.labels = [ self.label_group(stat, attr, mean_lab) for stat, mean_lab in zip(self.stats, self.mean_labels) ] self.attr_labels = [ QGraphicsSimpleTextItem(lab) for lab in self.label_txts ] for it in chain(self.labels, self.attr_labels): self.box_scene.addItem(it) self.display_changed() def display_changed(self): if self.dataset is None: return if not self.is_continuous: return self.display_changed_disc() self.order = list(range(len(self.stats))) criterion = self._sorting_criteria_attrs[self.compare] if criterion: vals = [getattr(stat, criterion) for stat in self.stats] overmax = max((val for val in vals if val is not None), default=0) \ + 1 vals = [val if val is not None else overmax for val in vals] self.order = sorted(self.order, key=vals.__getitem__) heights = 90 if self.show_annotations else 60 for row, box_index in enumerate(self.order): y = (-len(self.stats) + row) * heights + 10 for item in self.boxes[box_index]: self.box_scene.addItem(item) item.setY(y) labels = self.labels[box_index] if self.show_annotations: labels.show() labels.setY(y) else: labels.hide() label = self.attr_labels[box_index] label.setY(y - 15 - label.boundingRect().height()) if self.show_annotations: label.hide() else: stat = self.stats[box_index] if self.compare == OWBoxPlot.CompareMedians and \ stat.median is not None: pos = stat.median + 5 / self.scale_x elif self.compare == OWBoxPlot.CompareMeans or stat.q25 is None: pos = stat.mean + 5 / self.scale_x else: pos = stat.q25 label.setX(pos * self.scale_x) label.show() r = QRectF(self.scene_min_x, -30 - len(self.stats) * heights, self.scene_width, len(self.stats) * heights + 90) self.box_scene.setSceneRect(r) self.compute_tests() self.show_posthoc() self.select_box_items() def display_changed_disc(self): self.clear_scene() self.attr_labels = [ QGraphicsSimpleTextItem(lab) for lab in self.label_txts_all ] if not self.stretched: if self.group_var: self.labels = [ QGraphicsTextItem("{}".format(int(sum(cont)))) for cont in self.conts ] else: self.labels = [QGraphicsTextItem(str(int(sum(self.dist))))] self.draw_axis_disc() if self.group_var: self.boxes = [ self.strudel(cont, i) for i, cont in enumerate(self.conts) ] else: self.boxes = [self.strudel(self.dist)] for row, box in enumerate(self.boxes): y = (-len(self.boxes) + row) * 40 + 10 label = self.attr_labels[row] b = label.boundingRect() label.setPos(-b.width() - 10, y - b.height() / 2) self.box_scene.addItem(label) if not self.stretched: label = self.labels[row] b = label.boundingRect() if self.group_var: right = self.scale_x * sum(self.conts[row]) else: right = self.scale_x * sum(self.dist) label.setPos(right + 10, y - b.height() / 2) self.box_scene.addItem(label) if self.show_labels and self.attribute is not self.group_var: for text_item, bar_part in zip(box[1::2], box[::2]): label = QGraphicsSimpleTextItem(text_item.toPlainText()) label.setPos(bar_part.boundingRect().x(), y - label.boundingRect().height() - 8) self.box_scene.addItem(label) for item in box: if isinstance(item, QGraphicsTextItem): continue self.box_scene.addItem(item) item.setPos(0, y) self.box_scene.setSceneRect(-self.label_width - 5, -30 - len(self.boxes) * 40, self.scene_width, len(self.boxes * 40) + 90) self.infot1.setText("") self.select_box_items() # noinspection PyPep8Naming def compute_tests(self): # The t-test and ANOVA are implemented here since they efficiently use # the widget-specific data in self.stats. # The non-parametric tests can't do this, so we use statistics.tests def stat_ttest(): d1, d2 = self.stats if d1.n == 0 or d2.n == 0: return np.nan, np.nan pooled_var = d1.var / d1.n + d2.var / d2.n df = pooled_var ** 2 / \ ((d1.var / d1.n) ** 2 / (d1.n - 1) + (d2.var / d2.n) ** 2 / (d2.n - 1)) if pooled_var == 0: return np.nan, np.nan t = abs(d1.mean - d2.mean) / math.sqrt(pooled_var) p = 2 * (1 - scipy.special.stdtr(df, t)) return t, p # TODO: Check this function # noinspection PyPep8Naming def stat_ANOVA(): if any(stat.n == 0 for stat in self.stats): return np.nan, np.nan n = sum(stat.n for stat in self.stats) grand_avg = sum(stat.n * stat.mean for stat in self.stats) / n var_between = sum(stat.n * (stat.mean - grand_avg)**2 for stat in self.stats) df_between = len(self.stats) - 1 var_within = sum(stat.n * stat.var for stat in self.stats) df_within = n - len(self.stats) F = (var_between / df_between) / (var_within / df_within) p = 1 - scipy.special.fdtr(df_between, df_within, F) return F, p if self.compare == OWBoxPlot.CompareNone or len(self.stats) < 2: t = "" elif any(s.n <= 1 for s in self.stats): t = "At least one group has just one instance, " \ "cannot compute significance" elif len(self.stats) == 2: if self.compare == OWBoxPlot.CompareMedians: t = "" # z, self.p = tests.wilcoxon_rank_sum( # self.stats[0].dist, self.stats[1].dist) # t = "Mann-Whitney's z: %.1f (p=%.3f)" % (z, self.p) else: t, self.p = stat_ttest() t = "Student's t: %.3f (p=%.3f)" % (t, self.p) else: if self.compare == OWBoxPlot.CompareMedians: t = "" # U, self.p = -1, -1 # t = "Kruskal Wallis's U: %.1f (p=%.3f)" % (U, self.p) else: F, self.p = stat_ANOVA() t = "ANOVA: %.3f (p=%.3f)" % (F, self.p) self.infot1.setText("<center>%s</center>" % t) def mean_label(self, stat, attr, val_name): label = QGraphicsItemGroup() t = QGraphicsSimpleTextItem( "%.*f" % (attr.number_of_decimals + 1, stat.mean), label) t.setFont(self._label_font) bbox = t.boundingRect() w2, h = bbox.width() / 2, bbox.height() t.setPos(-w2, -h) tpm = QGraphicsSimpleTextItem( " \u00b1 " + "%.*f" % (attr.number_of_decimals + 1, stat.dev), label) tpm.setFont(self._label_font) tpm.setPos(w2, -h) if val_name: vnm = QGraphicsSimpleTextItem(val_name + ": ", label) vnm.setFont(self._label_font) vnm.setBrush(self._attr_brush) vb = vnm.boundingRect() label.min_x = -w2 - vb.width() vnm.setPos(label.min_x, -h) else: label.min_x = -w2 return label def draw_axis(self): """Draw the horizontal axis and sets self.scale_x""" misssing_stats = not self.stats stats = self.stats or [BoxData(np.array([[0.], [1.]]), self.attribute)] mean_labels = self.mean_labels or [ self.mean_label(stats[0], self.attribute, "") ] bottom = min(stat.a_min for stat in stats) top = max(stat.a_max for stat in stats) first_val, step = compute_scale(bottom, top) while bottom <= first_val: first_val -= step bottom = first_val no_ticks = math.ceil((top - first_val) / step) + 1 top = max(top, first_val + no_ticks * step) gbottom = min(bottom, min(stat.mean - stat.dev for stat in stats)) gtop = max(top, max(stat.mean + stat.dev for stat in stats)) bv = self.box_view viewrect = bv.viewport().rect().adjusted(15, 15, -15, -30) self.scale_x = scale_x = viewrect.width() / (gtop - gbottom) # In principle we should repeat this until convergence since the new # scaling is too conservative. (No chance am I doing this.) mlb = min(stat.mean + mean_lab.min_x / scale_x for stat, mean_lab in zip(stats, mean_labels)) if mlb < gbottom: gbottom = mlb self.scale_x = scale_x = viewrect.width() / (gtop - gbottom) self.scene_min_x = gbottom * scale_x self.scene_width = (gtop - gbottom) * scale_x val = first_val while True: l = self.box_scene.addLine(val * scale_x, -1, val * scale_x, 1, self._pen_axis_tick) l.setZValue(100) t = self.box_scene.addSimpleText( self.attribute.repr_val(val) if not misssing_stats else "?", self._axis_font) t.setFlags(t.flags() | QGraphicsItem.ItemIgnoresTransformations) r = t.boundingRect() t.setPos(val * scale_x - r.width() / 2, 8) if val >= top: break val += step self.box_scene.addLine(bottom * scale_x - 4, 0, top * scale_x + 4, 0, self._pen_axis) def draw_axis_disc(self): """ Draw the horizontal axis and sets self.scale_x for discrete attributes """ if self.stretched: step = steps = 10 else: if self.group_var: max_box = max(float(np.sum(dist)) for dist in self.conts) else: max_box = float(np.sum(self.dist)) if max_box == 0: self.scale_x = 1 return _, step = compute_scale(0, max_box) step = int(step) if step > 1 else 1 steps = int(math.ceil(max_box / step)) max_box = step * steps bv = self.box_view viewrect = bv.viewport().rect().adjusted(15, 15, -15, -30) self.scene_width = viewrect.width() lab_width = max(lab.boundingRect().width() for lab in self.attr_labels) lab_width = max(lab_width, 40) lab_width = min(lab_width, self.scene_width / 3) self.label_width = lab_width right_offset = 0 # offset for the right label if not self.stretched and self.labels: if self.group_var: rows = list(zip(self.conts, self.labels)) else: rows = [(self.dist, self.labels[0])] # available space left of the 'group labels' available = self.scene_width - lab_width - 10 scale_x = (available - right_offset) / max_box max_right = max( sum(dist) * scale_x + 10 + lbl.boundingRect().width() for dist, lbl in rows) right_offset = max(0, max_right - max_box * scale_x) self.scale_x = scale_x = \ (self.scene_width - lab_width - 10 - right_offset) / max_box self.box_scene.addLine(0, 0, max_box * scale_x, 0, self._pen_axis) for val in range(0, step * steps + 1, step): l = self.box_scene.addLine(val * scale_x, -1, val * scale_x, 1, self._pen_axis_tick) l.setZValue(100) t = self.box_scene.addSimpleText(str(val), self._axis_font) t.setPos(val * scale_x - t.boundingRect().width() / 2, 8) if self.stretched: self.scale_x *= 100 def label_group(self, stat, attr, mean_lab): def centered_text(val, pos): t = QGraphicsSimpleTextItem( "%.*f" % (attr.number_of_decimals + 1, val), labels) t.setFont(self._label_font) bbox = t.boundingRect() t.setPos(pos - bbox.width() / 2, 22) return t def line(x, down=1): QGraphicsLineItem(x, 12 * down, x, 20 * down, labels) def move_label(label, frm, to): label.setX(to) to += t_box.width() / 2 path = QPainterPath() path.lineTo(0, 4) path.lineTo(to - frm, 4) path.lineTo(to - frm, 8) p = QGraphicsPathItem(path) p.setPos(frm, 12) labels.addToGroup(p) labels = QGraphicsItemGroup() labels.addToGroup(mean_lab) m = stat.mean * self.scale_x mean_lab.setPos(m, -22) line(m, -1) if stat.median is not None: msc = stat.median * self.scale_x med_t = centered_text(stat.median, msc) med_box_width2 = med_t.boundingRect().width() line(msc) if stat.q25 is not None: x = stat.q25 * self.scale_x t = centered_text(stat.q25, x) t_box = t.boundingRect() med_left = msc - med_box_width2 if x + t_box.width() / 2 >= med_left - 5: move_label(t, x, med_left - t_box.width() - 5) else: line(x) if stat.q75 is not None: x = stat.q75 * self.scale_x t = centered_text(stat.q75, x) t_box = t.boundingRect() med_right = msc + med_box_width2 if x - t_box.width() / 2 <= med_right + 5: move_label(t, x, med_right + 5) else: line(x) return labels def box_group(self, stat, height=20): def line(x0, y0, x1, y1, *args): return QGraphicsLineItem(x0 * scale_x, y0, x1 * scale_x, y1, *args) scale_x = self.scale_x box = [] whisker1 = line(stat.a_min, -1.5, stat.a_min, 1.5) whisker2 = line(stat.a_max, -1.5, stat.a_max, 1.5) vert_line = line(stat.a_min, 0, stat.a_max, 0) mean_line = line(stat.mean, -height / 3, stat.mean, height / 3) for it in (whisker1, whisker2, mean_line): it.setPen(self._pen_paramet) vert_line.setPen(self._pen_dotted) var_line = line(stat.mean - stat.dev, 0, stat.mean + stat.dev, 0) var_line.setPen(self._pen_paramet) box.extend([whisker1, whisker2, vert_line, mean_line, var_line]) if stat.q25 is not None and stat.q75 is not None: mbox = FilterGraphicsRectItem(stat.conditions, stat.q25 * scale_x, -height / 2, (stat.q75 - stat.q25) * scale_x, height) mbox.setBrush(self._box_brush) mbox.setPen(QPen(Qt.NoPen)) mbox.setZValue(-200) box.append(mbox) if stat.median is not None: median_line = line(stat.median, -height / 2, stat.median, height / 2) median_line.setPen(self._pen_median) median_line.setZValue(-150) box.append(median_line) return box def strudel(self, dist, group_val_index=None): attr = self.attribute ss = np.sum(dist) box = [] if ss < 1e-6: cond = [FilterDiscrete(attr, None)] if group_val_index is not None: cond.append(FilterDiscrete(self.group_var, [group_val_index])) box.append(FilterGraphicsRectItem(cond, 0, -10, 1, 10)) cum = 0 for i, v in enumerate(dist): if v < 1e-6: continue if self.stretched: v /= ss v *= self.scale_x cond = [FilterDiscrete(attr, [i])] if group_val_index is not None: cond.append(FilterDiscrete(self.group_var, [group_val_index])) rect = FilterGraphicsRectItem(cond, cum + 1, -6, v - 2, 12) rect.setBrush(QBrush(QColor(*attr.colors[i]))) rect.setPen(QPen(Qt.NoPen)) if self.stretched: tooltip = "{}: {:.2f}%".format(attr.values[i], 100 * dist[i] / sum(dist)) else: tooltip = "{}: {}".format(attr.values[i], int(dist[i])) rect.setToolTip(tooltip) text = QGraphicsTextItem(attr.values[i]) box.append(rect) box.append(text) cum += v return box def commit(self): self.conditions = [ item.filter for item in self.box_scene.selectedItems() if item.filter ] selected, selection = None, [] if self.conditions: selected = Values(self.conditions, conjunction=False)(self.dataset) selection = np.in1d(self.dataset.ids, selected.ids, assume_unique=True).nonzero()[0] self.Outputs.selected_data.send(selected) self.Outputs.annotated_data.send( create_annotated_table(self.dataset, selection)) def show_posthoc(self): def line(y0, y1): it = self.box_scene.addLine(x, y0, x, y1, self._post_line_pen) it.setZValue(-100) self.posthoc_lines.append(it) while self.posthoc_lines: self.box_scene.removeItem(self.posthoc_lines.pop()) if self.compare == OWBoxPlot.CompareNone or len(self.stats) < 2: return if self.compare == OWBoxPlot.CompareMedians: crit_line = "median" else: crit_line = "mean" xs = [] height = 90 if self.show_annotations else 60 y_up = -len(self.stats) * height + 10 for pos, box_index in enumerate(self.order): stat = self.stats[box_index] x = getattr(stat, crit_line) if x is None: continue x *= self.scale_x xs.append(x * self.scale_x) by = y_up + pos * height line(by + 12, 3) line(by - 12, by - 25) used_to = [] last_to = to = 0 for frm, frm_x in enumerate(xs[:-1]): for to in range(frm + 1, len(xs)): if xs[to] - frm_x > 1.5: to -= 1 break if last_to == to or frm == to: continue for rowi, used in enumerate(used_to): if used < frm: used_to[rowi] = to break else: rowi = len(used_to) used_to.append(to) y = -6 - rowi * 6 it = self.box_scene.addLine(frm_x - 2, y, xs[to] + 2, y, self._post_grp_pen) self.posthoc_lines.append(it) last_to = to def get_widget_name_extension(self): if self.attribute: return self.attribute.name def send_report(self): self.report_plot() text = "" if self.attribute: text += "Box plot for attribute '{}' ".format(self.attribute.name) if self.group_var: text += "grouped by '{}'".format(self.group_var.name) if text: self.report_caption(text)
class OWSaveBase(widget.OWWidget, openclass=True): """ Base class for Save widgets A derived class must provide, at minimum: - class `Inputs` and the corresponding handler that: - saves the input to an attribute `data`, and - calls `self.on_new_input`. - a class attribute `filters` with a list of filters or a dictionary whose keys are filters OR a class method `get_filters` that returns such a list or dictionary - method `do_save` that saves `self.data` into `self.filename` Alternatively, instead of defining `do_save` a derived class can make `filters` a dictionary whose keys are classes that define a method `write` (like e.g. `TabReader`). Method `do_save` defined in the base class calls the writer corresponding to the currently chosen filter. A minimum example of derived class is `Orange.widgets.model.owsavemodel.OWSaveModel`. A more advanced widget that overrides a lot of base class behaviour is `Orange.widgets.data.owsave.OWSave`. """ class Information(widget.OWWidget.Information): empty_input = widget.Msg("Empty input; nothing was saved.") class Error(widget.OWWidget.Error): no_file_name = widget.Msg("File name is not set.") general_error = widget.Msg("{}") want_main_area = False resizing_enabled = False last_dir = Setting("") filter = Setting("") # default will be provided in __init__ filename = Setting("", schema_only=True) auto_save = Setting(False) filters = [] def __init__(self, start_row=0): """ Set up the gui. The gui consists of a checkbox for auto save and two buttons put on a grid layout. Derived widgets that want to place controls above the auto save widget can set the `start_row` argument to the first free row, and this constructor will start filling the grid there. Args: start_row (int): the row at which to start filling the gui """ super().__init__() self.data = None # This cannot be done outside because `filters` is defined by subclass if not self.filter: self.filter = next(iter(self.get_filters())) self.grid = grid = QGridLayout() gui.widgetBox(self.controlArea, orientation=grid) grid.addWidget( gui.checkBox(None, self, "auto_save", "Autosave when receiving new data", callback=self.update_messages), start_row, 0, 1, 2) grid.setRowMinimumHeight(start_row + 1, 8) self.bt_save = gui.button(None, self, "Save", callback=self.save_file) grid.addWidget(self.bt_save, start_row + 2, 0) grid.addWidget( gui.button(None, self, "Save as ...", callback=self.save_file_as), start_row + 2, 1) self.adjustSize() self.update_messages() @classmethod def get_filters(cls): return cls.filters @property def writer(self): """ Return the active writer The base class uses this property only in `do_save` to find the writer corresponding to the filter. Derived classes (e.g. OWSave) may also use it elsewhere. """ return self.get_filters()[self.filter] def on_new_input(self): """ This method must be called from input signal handler. - It clears errors, warnings and information and calls `self.update_messages` to set the as needed. - It also calls `update_status` the can be overriden in derived methods to set the status (e.g. the number of input rows) - Calls `self.save_file` if `self.auto_save` is enabled and `self.filename` is provided. """ self.Error.clear() self.Warning.clear() self.Information.clear() self.update_messages() self.update_status() if self.auto_save and self.filename: self.save_file() def save_file_as(self): """ Ask the user for the filename and try saving the file """ filename, selected_filter = self.get_save_filename() if not filename: return self.filename = filename self.filter = selected_filter self.last_dir = os.path.split(self.filename)[0] self.bt_save.setText(f"Save as {os.path.split(filename)[1]}") self.update_messages() self._try_save() def save_file(self): """ If file name is provided, try saving, else call save_file_as """ if not self.filename: self.save_file_as() else: self._try_save() def _try_save(self): """ Private method that calls do_save within try-except that catches and shows IOError. Do nothing if not data or no file name. """ self.Error.general_error.clear() if self.data is None or not self.filename: return try: self.do_save() except IOError as err_value: self.Error.general_error(str(err_value)) def do_save(self): """ Do the saving. Default implementation calls the write method of the writer corresponding to the current filter. This requires that get_filters() returns is a dictionary whose keys are classes. Derived classes may simplify this by providing a list of filters and override do_save. This is particularly handy if the widget supports only a single format. """ # This method is separated out because it will usually be overriden self.writer.write(self.filename, self.data) def update_messages(self): """ Update errors, warnings and information. Default method sets no_file_name if auto_save is enabled but file name is not provided; and empty_input if file name is given but there is no data. Derived classes that define further messages will typically set them in this method. """ self.Error.no_file_name(shown=not self.filename and self.auto_save) self.Information.empty_input(shown=self.filename and self.data is None) def update_status(self): """ Update the input/output indicator. Default method does nothing. """ def initial_start_dir(self): """ Provide initial start directory Return either the current file's path, the last directory or home. """ if self.filename and os.path.exists(os.path.split(self.filename)[0]): return self.filename else: return self.last_dir or _userhome @staticmethod def suggested_name(): """ Suggest the name for the output file or return an empty string. """ return "" @classmethod def _replace_extension(cls, filename, extension): """ Remove all extensions that appear in any filter. Double extensions are broken in different weird ways across all systems, including omitting some, like turning iris.tab.gz to iris.gz. This function removes anything that can appear anywhere. """ known_extensions = set() for filt in cls.get_filters(): known_extensions |= set( cls._extension_from_filter(filt).split(".")) if "" in known_extensions: known_extensions.remove("") while True: base, ext = os.path.splitext(filename) if ext[1:] not in known_extensions: break filename = base return filename + extension @staticmethod def _extension_from_filter(selected_filter): return re.search(r".*\(\*?(\..*)\)$", selected_filter).group(1) def valid_filters(self): return self.get_filters() def default_valid_filter(self): return self.filter # As of Qt 5.9, QFileDialog.setDefaultSuffix does not support double # suffixes, not even in non-native dialogs. We handle each OS separately. if sys.platform in ("darwin", "win32"): # macOS and Windows native dialogs do not correctly handle double # extensions. We thus don't pass any suffixes to the dialog and add # the correct suffix after closing the dialog and only then check # if the file exists and ask whether to override. # It is a bit confusing that the user does not see the final name in the # dialog, but I see no better solution. def get_save_filename(self): # pragma: no cover if sys.platform == "darwin": def remove_star(filt): return filt.replace(" (*.", " (.") else: def remove_star(filt): return filt no_ext_filters = {remove_star(f): f for f in self.valid_filters()} filename = self.initial_start_dir() while True: dlg = QFileDialog(None, "Save File", filename, ";;".join(no_ext_filters)) dlg.setAcceptMode(dlg.AcceptSave) dlg.selectNameFilter(remove_star(self.default_valid_filter())) dlg.setOption(QFileDialog.DontConfirmOverwrite) if dlg.exec() == QFileDialog.Rejected: return "", "" filename = dlg.selectedFiles()[0] selected_filter = no_ext_filters[dlg.selectedNameFilter()] filename = self._replace_extension( filename, self._extension_from_filter(selected_filter)) if not os.path.exists(filename) or QMessageBox.question( self, "Overwrite file?", f"File {os.path.split(filename)[1]} already exists.\n" "Overwrite?") == QMessageBox.Yes: return filename, selected_filter else: # Linux and any unknown platforms # Qt does not use a native dialog on Linux, so we can connect to # filterSelected and to overload selectFile to change the extension # while the dialog is open. # For unknown platforms (which?), we also use the non-native dialog to # be sure we know what happens. class SaveFileDialog(QFileDialog): # pylint: disable=protected-access def __init__(self, save_cls, *args, **kwargs): super().__init__(*args, **kwargs) self.save_cls = save_cls self.suffix = "" self.setAcceptMode(QFileDialog.AcceptSave) self.setOption(QFileDialog.DontUseNativeDialog) self.filterSelected.connect(self.updateDefaultExtension) def selectNameFilter(self, selected_filter): super().selectNameFilter(selected_filter) self.updateDefaultExtension(selected_filter) def updateDefaultExtension(self, selected_filter): self.suffix = \ self.save_cls._extension_from_filter(selected_filter) files = self.selectedFiles() if files and not os.path.isdir(files[0]): self.selectFile(files[0]) def selectFile(self, filename): filename = \ self.save_cls._replace_extension(filename, self.suffix) super().selectFile(filename) def get_save_filename(self): dlg = self.SaveFileDialog(type(self), None, "Save File", self.initial_start_dir(), ";;".join(self.valid_filters())) dlg.selectNameFilter(self.default_valid_filter()) if dlg.exec() == QFileDialog.Rejected: return "", "" else: return dlg.selectedFiles()[0], dlg.selectedNameFilter()
class OWLouvainClustering(widget.OWWidget): name = "Louvain Clustering" description = "Detects communities in a network of nearest neighbors." icon = "icons/LouvainClustering.svg" priority = 2110 want_main_area = False settingsHandler = DomainContextHandler() class Inputs: data = Input("Data", Table, default=True) class Outputs: annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table, default=True) if Network is not None: graph = Output("Network", Network) apply_pca = ContextSetting(True) pca_components = ContextSetting(_DEFAULT_PCA_COMPONENTS) normalize = ContextSetting(True) metric_idx = ContextSetting(0) k_neighbors = ContextSetting(_DEFAULT_K_NEIGHBORS) resolution = ContextSetting(1.) auto_commit = Setting(False) class Information(widget.OWWidget.Information): modified = Msg("Press commit to recompute clusters and send new data") class Error(widget.OWWidget.Error): empty_dataset = Msg("No features in data") def __init__(self): super().__init__() self.data = None # type: Optional[Table] self.preprocessed_data = None # type: Optional[Table] self.pca_projection = None # type: Optional[Table] self.graph = None # type: Optional[nx.Graph] self.partition = None # type: Optional[np.array] # Use a executor with a single worker, to limit CPU overcommitment for # cancelled tasks. The method does not have a fine cancellation # granularity so we assure that there are not N - 1 jobs executing # for no reason only to be thrown away. It would be better to use the # global pool but implement a limit on jobs from this source. self.__executor = futures.ThreadPoolExecutor(max_workers=1) self.__task = None # type: Optional[TaskState] self.__invalidated = False # coalescing commit timer self.__commit_timer = QTimer(self, singleShot=True) self.__commit_timer.timeout.connect(self.commit) # Set up UI info_box = gui.vBox(self.controlArea, "Info") self.info_label = gui.widgetLabel(info_box, "No data on input.") # type: QLabel preprocessing_box = gui.vBox(self.controlArea, "Preprocessing") self.normalize_cbx = gui.checkBox( preprocessing_box, self, "normalize", label="Normalize data", callback=self._invalidate_preprocessed_data, ) # type: QCheckBox self.apply_pca_cbx = gui.checkBox( preprocessing_box, self, "apply_pca", label="Apply PCA preprocessing", callback=self._apply_pca_changed, ) # type: QCheckBox self.pca_components_slider = gui.hSlider( preprocessing_box, self, "pca_components", label="PCA Components: ", minValue=2, maxValue=_MAX_PCA_COMPONENTS, callback=self._invalidate_pca_projection, tracking=False) # type: QSlider graph_box = gui.vBox(self.controlArea, "Graph parameters") self.metric_combo = gui.comboBox( graph_box, self, "metric_idx", label="Distance metric", items=[m[0] for m in METRICS], callback=self._invalidate_graph, orientation=Qt.Horizontal, ) # type: gui.OrangeComboBox self.k_neighbors_spin = gui.spin( graph_box, self, "k_neighbors", minv=1, maxv=_MAX_K_NEIGBOURS, label="k neighbors", controlWidth=80, alignment=Qt.AlignRight, callback=self._invalidate_graph, ) # type: gui.SpinBoxWFocusOut self.resolution_spin = gui.hSlider( graph_box, self, "resolution", minValue=0, maxValue=5., step=1e-1, label="Resolution", intOnly=False, labelFormat="%.1f", callback=self._invalidate_partition, tracking=False, ) # type: QSlider self.resolution_spin.parent().setToolTip( "The resolution parameter affects the number of clusters to find. " "Smaller values tend to produce more clusters and larger values " "retrieve less clusters.") self.apply_button = gui.auto_apply( self.controlArea, self, "auto_commit", box=None, commit=lambda: self.commit(), callback=lambda: self._on_auto_commit_changed()) # type: QWidget def _preprocess_data(self): if self.preprocessed_data is None: if self.normalize: normalizer = preprocess.Normalize(center=False) self.preprocessed_data = normalizer(self.data) else: self.preprocessed_data = self.data def _apply_pca_changed(self): self.controls.pca_components.setEnabled(self.apply_pca) self._invalidate_graph() def _invalidate_preprocessed_data(self): self.preprocessed_data = None self._invalidate_pca_projection() # If we don't apply PCA, this still invalidates the graph, otherwise # this change won't be propagated further if not self.apply_pca: self._invalidate_graph() def _invalidate_pca_projection(self): self.pca_projection = None if not self.apply_pca: return self._invalidate_graph() self._set_modified(True) def _invalidate_graph(self): self.graph = None self._invalidate_partition() self._set_modified(True) def _invalidate_partition(self): self.partition = None self._invalidate_output() self.Information.modified() self._set_modified(True) def _invalidate_output(self): self.__invalidated = True if self.__task is not None: self.__cancel_task(wait=False) if self.auto_commit: self.__commit_timer.start() else: self.__set_state_ready() def _set_modified(self, state): """ Mark the widget (GUI) as containing modified state. """ if self.data is None: # does not apply when we have no data state = False elif self.auto_commit: # does not apply when auto commit is on state = False self.Information.modified(shown=state) def _on_auto_commit_changed(self): if self.auto_commit and self.__invalidated: self.commit() def cancel(self): """Cancel any running jobs.""" self.__cancel_task(wait=False) self.__set_state_ready() def commit(self): self.__commit_timer.stop() self.__invalidated = False self._set_modified(False) # Cancel current running task self.__cancel_task(wait=False) if self.data is None: self.__set_state_ready() return self.Error.clear() if self.partition is not None: self.__set_state_ready() self._send_data() return self._preprocess_data() state = TaskState(self) # Prepare/assemble the task(s) to run; reuse partial results if self.apply_pca: if self.pca_projection is not None: data = self.pca_projection pca_components = None else: data = self.preprocessed_data pca_components = self.pca_components else: data = self.preprocessed_data pca_components = None if self.graph is not None: # run on graph only; no need to do PCA and k-nn search ... graph = self.graph k_neighbors = metric = None else: k_neighbors, metric = self.k_neighbors, METRICS[self.metric_idx][1] graph = None if graph is None: task = partial( run_on_data, data, pca_components=pca_components, normalize=self.normalize, k_neighbors=k_neighbors, metric=metric, resolution=self.resolution, state=state, ) else: task = partial(run_on_graph, graph, resolution=self.resolution, state=state) self.info_label.setText("Running...") self.__set_state_busy() self.__start_task(task, state) @Slot(object) def __set_partial_results(self, result): # type: (Tuple[str, Any]) -> None which, res = result if which == "pca_projection": assert isinstance(res, Table) and len(res) == len(self.data) self.pca_projection = res elif which == "graph": assert isinstance(res, nx.Graph) self.graph = res elif which == "partition": assert isinstance(res, np.ndarray) self.partition = res else: assert False, which @Slot(object) def __on_done(self, future): # type: (Future["Results"]) -> None assert future.done() assert self.__task is not None assert self.__task.future is future assert self.__task.watcher.future() is future self.__task, task = None, self.__task task.deleteLater() self.__set_state_ready() result = future.result() self.__set_results(result) @Slot(str) def setStatusMessage(self, text): super().setStatusMessage(text) @Slot(float) def progressBarSet(self, value, *a, **kw): super().progressBarSet(value, *a, **kw) def __set_state_ready(self): self.progressBarFinished() self.setInvalidated(False) self.setStatusMessage("") def __set_state_busy(self): self.progressBarInit() self.setInvalidated(True) def __start_task(self, task, state): # type: (Callable[[], Any], TaskState) -> None assert self.__task is None state.status_changed.connect(self.setStatusMessage) state.progress_changed.connect(self.progressBarSet) state.partial_result_ready.connect(self.__set_partial_results) state.watcher.done.connect(self.__on_done) state.start(self.__executor, task) state.setParent(self) self.__task = state def __cancel_task(self, wait=True): # Cancel and dispose of the current task if self.__task is not None: state, self.__task = self.__task, None state.cancel() state.partial_result_ready.disconnect(self.__set_partial_results) state.status_changed.disconnect(self.setStatusMessage) state.progress_changed.disconnect(self.progressBarSet) state.watcher.done.disconnect(self.__on_done) if wait: futures.wait([state.future]) state.deleteLater() else: w = FutureWatcher(state.future, parent=state) w.done.connect(state.deleteLater) def __set_results(self, results): # type: ("Results") -> None # NOTE: All of these have already been set by __set_partial_results, # we double check that they are aliases if results.pca_projection is not None: assert self.pca_components == results.pca_components assert self.pca_projection is results.pca_projection self.pca_projection = results.pca_projection if results.graph is not None: assert results.metric == METRICS[self.metric_idx][1] assert results.k_neighbors == self.k_neighbors assert self.graph is results.graph self.graph = results.graph if results.partition is not None: assert results.resolution == self.resolution assert self.partition is results.partition self.partition = results.partition # Display the number of found clusters in the UI num_clusters = len(np.unique(self.partition)) self.info_label.setText("%d clusters found." % num_clusters) self._send_data() def _send_data(self): if self.partition is None or self.data is None: return domain = self.data.domain # Compute the frequency of each cluster index counts = np.bincount(self.partition) indices = np.argsort(counts)[::-1] index_map = {n: o for n, o in zip(indices, range(len(indices)))} new_partition = list(map(index_map.get, self.partition)) cluster_var = DiscreteVariable( get_unique_names(domain, "Cluster"), values=[ "C%d" % (i + 1) for i, _ in enumerate(np.unique(new_partition)) ]) new_domain = add_columns(domain, metas=[cluster_var]) new_table = self.data.transform(new_domain) new_table.get_column_view(cluster_var)[0][:] = new_partition self.Outputs.annotated_data.send(new_table) if Network is not None: n_edges = self.graph.number_of_edges() edges = sp.coo_matrix( (np.ones(n_edges), np.array(self.graph.edges()).T), shape=(n_edges, n_edges)) graph = Network(new_table, edges) self.Outputs.graph.send(graph) @Inputs.data def set_data(self, data): self.closeContext() self.Error.clear() prev_data, self.data = self.data, data self.openContext(self.data) # Make sure to properly enable/disable slider based on `apply_pca` setting self.controls.pca_components.setEnabled(self.apply_pca) if prev_data and self.data and array_equal(prev_data.X, self.data.X): if self.auto_commit and not self.isInvalidated(): self._send_data() return self.cancel() # Clear the outputs self.Outputs.annotated_data.send(None) if Network is not None: self.Outputs.graph.send(None) # Clear internal state self.clear() self._invalidate_pca_projection() # Make sure the dataset is ok if self.data is not None and len(self.data.domain.attributes) < 1: self.Error.empty_dataset() self.data = None if self.data is None: return # Can't have more PCA components than the number of attributes n_attrs = len(data.domain.attributes) self.pca_components_slider.setMaximum(min(_MAX_PCA_COMPONENTS, n_attrs)) # Can't have more k neighbors than there are data points self.k_neighbors_spin.setMaximum(min(_MAX_K_NEIGBOURS, len(data) - 1)) self.info_label.setText("Clustering not yet run.") self.commit() def clear(self): self.__cancel_task(wait=False) self.preprocessed_data = None self.pca_projection = None self.graph = None self.partition = None self.Error.clear() self.Information.modified.clear() self.info_label.setText("No data on input.") def onDeleteWidget(self): self.__cancel_task(wait=True) self.__executor.shutdown(True) self.clear() self.data = None super().onDeleteWidget() def send_report(self): pca = report.bool_str(self.apply_pca) if self.apply_pca: pca += report.plural(", {number} component{s}", self.pca_components) self.report_items(( ("Normalize data", report.bool_str(self.normalize)), ("PCA preprocessing", pca), ("Metric", METRICS[self.metric_idx][0]), ("k neighbors", self.k_neighbors), ("Resolution", self.resolution), ))
class OWScatterPlotGraph(gui.OWComponent, ScaleScatterPlotData): attr_color = ContextSetting(None, required=ContextSetting.OPTIONAL) attr_label = ContextSetting(None, required=ContextSetting.OPTIONAL) attr_shape = ContextSetting(None, required=ContextSetting.OPTIONAL) attr_size = ContextSetting(None, required=ContextSetting.OPTIONAL) label_only_selected = Setting(False) point_width = Setting(10) alpha_value = Setting(128) show_grid = Setting(False) show_legend = Setting(True) tooltip_shows_all = Setting(False) class_density = Setting(False) show_reg_line = Setting(False) resolution = 256 CurveSymbols = np.array("o x t + d s t2 t3 p h star ?".split()) MinShapeSize = 6 DarkerValue = 120 UnknownColor = (168, 50, 168) def __init__(self, scatter_widget, parent=None, _="None", view_box=InteractiveViewBox): gui.OWComponent.__init__(self, scatter_widget) self.view_box = view_box(self) self.plot_widget = pg.PlotWidget(viewBox=self.view_box, parent=parent, background="w") self.plot_widget.getPlotItem().buttonsHidden = True self.plot_widget.setAntialiasing(True) self.plot_widget.sizeHint = lambda: QSize(500, 500) scene = self.plot_widget.scene() self._create_drag_tooltip(scene) self._data = None # Original Table as passed from widget to new_data before transformations self.replot = self.plot_widget.replot ScaleScatterPlotData.__init__(self) self.density_img = None self.scatterplot_item = None self.scatterplot_item_sel = None self.reg_line_item = None self.labels = [] self.master = scatter_widget self.master.Warning.add_message( "missing_coords", "Plot cannot be displayed because '{}' or '{}' is missing for " "all data points") self.master.Information.add_message( "missing_coords", "Points with missing '{}' or '{}' are not displayed") self.master.Information.add_message( "missing_size", "Points with undefined '{}' are shown in smaller size") self.master.Information.add_message( "missing_shape", "Points with undefined '{}' are shown as crossed circles") self.shown_attribute_indices = [] self.shown_x = self.shown_y = None self.pen_colors = self.brush_colors = None self.valid_data = None # np.ndarray self.selection = None # np.ndarray self.n_points = 0 self.gui = OWPlotGUI(self) self.continuous_palette = ContinuousPaletteGenerator( QColor(255, 255, 0), QColor(0, 0, 255), True) self.discrete_palette = ColorPaletteGenerator() self.selection_behavior = 0 self.legend = self.color_legend = None self.__legend_anchor = (1, 0), (1, 0) self.__color_legend_anchor = (1, 1), (1, 1) self.scale = None # DiscretizedScale self.subset_indices = None # self.setMouseTracking(True) # self.grabGesture(QPinchGesture) # self.grabGesture(QPanGesture) self.update_grid() self._tooltip_delegate = HelpEventDelegate(self.help_event) self.plot_widget.scene().installEventFilter(self._tooltip_delegate) def _create_drag_tooltip(self, scene): tip_parts = [(Qt.ShiftModifier, "Shift: Add group"), (Qt.ShiftModifier + Qt.ControlModifier, "Shift-{}: Append to group".format( "Cmd" if sys.platform == "darwin" else "Ctrl")), (Qt.AltModifier, "Alt: Remove")] all_parts = ", ".join(part for _, part in tip_parts) self.tiptexts = { int(modifier): all_parts.replace(part, "<b>{}</b>".format(part)) for modifier, part in tip_parts } self.tiptexts[0] = all_parts self.tip_textitem = text = QGraphicsTextItem() # Set to the longest text text.setHtml(self.tiptexts[Qt.ShiftModifier + Qt.ControlModifier]) text.setPos(4, 2) r = text.boundingRect() rect = QGraphicsRectItem(0, 0, r.width() + 8, r.height() + 4) rect.setBrush(QColor(224, 224, 224, 212)) rect.setPen(QPen(Qt.NoPen)) self.update_tooltip(Qt.NoModifier) scene.drag_tooltip = scene.createItemGroup([rect, text]) scene.drag_tooltip.hide() def update_tooltip(self, modifiers): modifiers &= Qt.ShiftModifier + Qt.ControlModifier + Qt.AltModifier text = self.tiptexts.get(int(modifiers), self.tiptexts[0]) self.tip_textitem.setHtml(text) def new_data(self, data, subset_data=None, new=True, **args): if new: self.plot_widget.clear() self.remove_legend() self.density_img = None self.scatterplot_item = None self.scatterplot_item_sel = None self.reg_line_item = None self.labels = [] self.selection = None self.valid_data = None self.subset_indices = set( e.id for e in subset_data) if subset_data else None self._data = data data = self.sparse_to_dense() self.set_data(data, **args) def sparse_to_dense(self): data = self._data if data is None or not data.is_sparse(): return data attrs = { self.shown_x, self.shown_y, self.attr_color, self.attr_shape, self.attr_size, self.attr_label } domain = data.domain all_attrs = domain.variables + domain.metas attrs = list(set(all_attrs) & attrs) selected_data = data[:, attrs].to_dense() return selected_data def _clear_plot_widget(self): self.remove_legend() if self.density_img: self.plot_widget.removeItem(self.density_img) self.density_img = None if self.scatterplot_item: self.plot_widget.removeItem(self.scatterplot_item) self.scatterplot_item = None if self.scatterplot_item_sel: self.plot_widget.removeItem(self.scatterplot_item_sel) self.scatterplot_item_sel = None if self.reg_line_item: self.plot_widget.removeItem(self.reg_line_item) self.reg_line_item = None for label in self.labels: self.plot_widget.removeItem(label) self.labels = [] self.set_axis_title("bottom", "") self.set_axis_title("left", "") def update_data(self, attr_x, attr_y, reset_view=True): self.master.Warning.missing_coords.clear() self.master.Information.missing_coords.clear() self._clear_plot_widget() if self.shown_y != attr_y: # 'reset' the axis text width estimation. Without this the left # axis tick labels space only ever expands yaxis = self.plot_widget.getAxis("left") yaxis.textWidth = 30 self.shown_x, self.shown_y = attr_x, attr_y if attr_x not in self.data.domain or attr_y not in self.data.domain: data = self.sparse_to_dense() self.set_data(data) if self.jittered_data is None or not len(self.jittered_data): self.valid_data = None else: self.valid_data = self.get_valid_list([attr_x, attr_y]) if not np.any(self.valid_data): self.valid_data = None if self.valid_data is None: self.selection = None self.n_points = 0 self.master.Warning.missing_coords(self.shown_x.name, self.shown_y.name) return x_data, y_data = self.get_xy_data_positions(attr_x, attr_y, self.valid_data) self.n_points = len(x_data) if reset_view: min_x, max_x = np.nanmin(x_data), np.nanmax(x_data) min_y, max_y = np.nanmin(y_data), np.nanmax(y_data) self.view_box.setRange(QRectF(min_x, min_y, max_x - min_x, max_y - min_y), padding=0.025) self.view_box.init_history() self.view_box.tag_history() [min_x, max_x], [min_y, max_y] = self.view_box.viewRange() for axis, var in (("bottom", attr_x), ("left", attr_y)): self.set_axis_title(axis, var) if var.is_discrete: self.set_labels(axis, get_variable_values_sorted(var)) else: self.set_labels(axis, None) color_data, brush_data = self.compute_colors() color_data_sel, brush_data_sel = self.compute_colors_sel() size_data = self.compute_sizes() shape_data = self.compute_symbols() if self.should_draw_density(): rgb_data = [pen.color().getRgb()[:3] for pen in color_data] self.density_img = classdensity.class_density_image( min_x, max_x, min_y, max_y, self.resolution, x_data, y_data, rgb_data) self.plot_widget.addItem(self.density_img) self.data_indices = np.flatnonzero(self.valid_data) if len(self.data_indices) != len(self.data): self.master.Information.missing_coords(self.shown_x.name, self.shown_y.name) self.scatterplot_item = ScatterPlotItem(x=x_data, y=y_data, data=self.data_indices, symbol=shape_data, size=size_data, pen=color_data, brush=brush_data) self.scatterplot_item_sel = ScatterPlotItem(x=x_data, y=y_data, data=self.data_indices, symbol=shape_data, size=size_data + SELECTION_WIDTH, pen=color_data_sel, brush=brush_data_sel) self.plot_widget.addItem(self.scatterplot_item_sel) self.plot_widget.addItem(self.scatterplot_item) self.scatterplot_item.selected_points = [] self.scatterplot_item.sigClicked.connect(self.select_by_click) self.draw_regression_line(x_data, y_data, min_x, max_x) self.update_labels() self.make_legend() self.plot_widget.replot() def draw_regression_line(self, x_data, y_data, min_x, max_x): if self.show_reg_line and self.can_draw_regresssion_line(): slope, intercept, rvalue, _, _ = linregress(x_data, y_data) start_y = min_x * slope + intercept end_y = max_x * slope + intercept angle = np.degrees(np.arctan((end_y - start_y) / (max_x - min_x))) rotate = ((angle + 45) % 180) - 45 > 90 color = QColor("#505050") l_opts = dict(color=color, position=abs(int(rotate) - 0.85), rotateAxis=(1, 0), movable=True) self.reg_line_item = InfiniteLine( pos=QPointF(min_x, start_y), pen=pg.mkPen(color=color, width=1), angle=angle, label="r = {:.2f}".format(rvalue), labelOpts=l_opts) if rotate: self.reg_line_item.label.angle = 180 self.reg_line_item.label.updateTransform() self.plot_widget.addItem(self.reg_line_item) def can_draw_density(self): return self.domain is not None and \ self.attr_color is not None and \ self.attr_color.is_discrete and \ self.shown_x.is_continuous and \ self.shown_y.is_continuous def should_draw_density(self): return self.class_density and self.n_points > 1 and self.can_draw_density( ) def can_draw_regresssion_line(self): return self.domain is not None and \ self.shown_x.is_continuous and \ self.shown_y.is_continuous def set_labels(self, axis, labels): axis = self.plot_widget.getAxis(axis) if labels: ticks = [[(i, labels[i]) for i in range(len(labels))]] axis.setTicks(ticks) else: axis.setTicks(None) def set_axis_title(self, axis, title): self.plot_widget.setLabel(axis=axis, text=title) def compute_sizes(self): self.master.Information.missing_size.clear() if self.attr_size is None: size_data = np.full((self.n_points, ), self.point_width, dtype=float) else: size_data = \ self.MinShapeSize + \ self.scaled_data.get_column_view(self.attr_size)[0][self.valid_data] * \ self.point_width nans = np.isnan(size_data) if np.any(nans): size_data[nans] = self.MinShapeSize - 2 self.master.Information.missing_size(self.attr_size) return size_data def update_sizes(self): self.set_data(self.sparse_to_dense()) self.update_point_size() def update_point_size(self): if self.scatterplot_item: size_data = self.compute_sizes() self.scatterplot_item.setSize(size_data) self.scatterplot_item_sel.setSize(size_data + SELECTION_WIDTH) def get_color(self): if self.attr_color is None: return None colors = self.attr_color.colors if self.attr_color.is_discrete: self.discrete_palette = ColorPaletteGenerator( number_of_colors=min(len(colors), MAX), rgb_colors=colors if len(colors) <= MAX else DefaultRGBColors) else: self.continuous_palette = ContinuousPaletteGenerator(*colors) return self.attr_color def compute_colors_sel(self, keep_colors=False): if not keep_colors: self.pen_colors_sel = self.brush_colors_sel = None nopen = QPen(Qt.NoPen) if self.selection is not None: sels = np.max(self.selection) if sels == 1: pens = [ nopen, _make_pen(QColor(255, 190, 0, 255), SELECTION_WIDTH + 1.) ] else: # Start with the first color so that the colors of the # additional attribute in annotation (which start with 0, # unselected) will match these colors palette = ColorPaletteGenerator(number_of_colors=sels + 1) pens = [nopen] + \ [_make_pen(palette[i + 1], SELECTION_WIDTH + 1.) for i in range(sels)] pen = [pens[a] for a in self.selection[self.valid_data]] else: pen = [nopen] * self.n_points brush = [QBrush(QColor(255, 255, 255, 0))] * self.n_points return pen, brush def _reduce_values(self, attr): """ If discrete variable has more than maximium allowed values, less used values are joined as "Other" """ c_data = self.data.get_column_view(attr)[0][self.valid_data] if attr.is_continuous or len(attr.values) <= MAX: return None, c_data values_to_replace = Counter(c_data) values_to_replace = sorted(values_to_replace, key=values_to_replace.get, reverse=True) return values_to_replace, c_data def _get_values(self, attr): if len(attr.values) <= MAX: return attr.values values_to_replace, _ = self._reduce_values(attr) return [ attr.values[int(i)] for i in values_to_replace if not np.isnan(i) ][:MAX - 1] + ["Other"] def _get_data(self, attr): values_to_replace, c_data = self._reduce_values(attr) if values_to_replace is not None: c_data_2 = c_data.copy() for i, v in enumerate(values_to_replace): c_data[c_data_2 == v] = i if i < MAX - 1 else MAX - 1 return c_data def compute_colors(self, keep_colors=False): if not keep_colors: self.pen_colors = self.brush_colors = None self.get_color() subset = None if self.subset_indices: subset = np.array([ ex.id in self.subset_indices for ex in self.data[self.valid_data] ]) if self.attr_color is None: # same color color = self.plot_widget.palette().color(OWPalette.Data) pen = [_make_pen(color, 1.5)] * self.n_points if subset is not None: brush = [(QBrush(QColor(128, 128, 128, 0)), QBrush(QColor(128, 128, 128, 255)))[s] for s in subset] else: brush = [QBrush(QColor(128, 128, 128, self.alpha_value))] \ * self.n_points return pen, brush c_data = self._get_data(self.attr_color) if self.attr_color.is_continuous: if self.pen_colors is None: self.scale = DiscretizedScale(np.nanmin(c_data), np.nanmax(c_data)) c_data -= self.scale.offset c_data /= self.scale.width c_data = np.floor(c_data) + 0.5 c_data /= self.scale.bins c_data = np.clip(c_data, 0, 1) palette = self.continuous_palette self.pen_colors = palette.getRGB(c_data) self.brush_colors = np.hstack([ self.pen_colors, np.full((self.n_points, 1), self.alpha_value, dtype=int) ]) self.pen_colors *= 100 self.pen_colors //= self.DarkerValue self.pen_colors = [ _make_pen(QColor(*col), 1.5) for col in self.pen_colors.tolist() ] if subset is not None: self.brush_colors[:, 3] = 0 self.brush_colors[subset, 3] = 255 else: self.brush_colors[:, 3] = self.alpha_value pen = self.pen_colors brush = np.array( [QBrush(QColor(*col)) for col in self.brush_colors.tolist()]) else: if self.pen_colors is None: palette = self.discrete_palette n_colors = palette.number_of_colors c_data = c_data.copy() c_data[np.isnan(c_data)] = n_colors c_data = c_data.astype(int) colors = np.r_[palette.getRGB(np.arange(n_colors)), [[128, 128, 128]]] pens = np.array([ _make_pen(QColor(*col).darker(self.DarkerValue), 1.5) for col in colors ]) self.pen_colors = pens[c_data] alpha = self.alpha_value if subset is None else 255 self.brush_colors = np.array([[ QBrush(QColor(0, 0, 0, 0)), QBrush(QColor(col[0], col[1], col[2], alpha)) ] for col in colors]) self.brush_colors = self.brush_colors[c_data] if subset is not None: brush = np.where(subset, self.brush_colors[:, 1], self.brush_colors[:, 0]) else: brush = self.brush_colors[:, 1] pen = self.pen_colors return pen, brush def update_colors(self, keep_colors=False): self.master.update_colors() self.set_data(self.sparse_to_dense()) self.update_alpha_value(keep_colors) def update_alpha_value(self, keep_colors=False): if self.scatterplot_item: pen_data, brush_data = self.compute_colors(keep_colors) pen_data_sel, brush_data_sel = self.compute_colors_sel(keep_colors) self.scatterplot_item.setPen(pen_data, update=False, mask=None) self.scatterplot_item.setBrush(brush_data, mask=None) self.scatterplot_item_sel.setPen(pen_data_sel, update=False, mask=None) self.scatterplot_item_sel.setBrush(brush_data_sel, mask=None) if not keep_colors: self.make_legend() if self.should_draw_density(): self.update_data(self.shown_x, self.shown_y) elif self.density_img: self.plot_widget.removeItem(self.density_img) def create_labels(self): for x, y in zip(*self.scatterplot_item.getData()): ti = TextItem() self.plot_widget.addItem(ti) ti.setPos(x, y) self.labels.append(ti) def _create_label_column(self): if self.attr_label in self.data.domain: label_column = self.data.get_column_view(self.attr_label)[0] else: label_column = self.master.data.get_column_view(self.attr_label)[0] return label_column[self.data_indices] def update_labels(self): if self.attr_label is None or \ self.label_only_selected and self.selection is None: for label in self.labels: label.setText("") return self.assure_attribute_present(self.attr_label) if not self.labels: self.create_labels() label_column = self._create_label_column() formatter = self.attr_label.str_val label_data = map(formatter, label_column) black = pg.mkColor(0, 0, 0) selection = self.selection[ self.valid_data] if self.selection is not None else [] if self.label_only_selected: for label, text, selected \ in zip(self.labels, label_data, selection): label.setText(text if selected else "", black) else: for label, text in zip(self.labels, label_data): label.setText(text, black) def compute_symbols(self): self.master.Information.missing_shape.clear() if self.attr_shape is None: shape_data = self.CurveSymbols[np.zeros(self.n_points, dtype=int)] else: shape_data = self._get_data(self.attr_shape) nans = np.isnan(shape_data) if np.any(nans): shape_data[nans] = len(self.CurveSymbols) - 1 self.master.Information.missing_shape(self.attr_shape) shape_data = self.CurveSymbols[shape_data.astype(int)] return shape_data def update_shapes(self): self.assure_attribute_present(self.attr_shape) if self.scatterplot_item: shape_data = self.compute_symbols() self.scatterplot_item.setSymbol(shape_data) self.make_legend() def assure_attribute_present(self, attr): if self.data is not None and attr not in self.data.domain: self.set_data(self.sparse_to_dense()) def update_grid(self): self.plot_widget.showGrid(x=self.show_grid, y=self.show_grid) def update_legend(self): if self.legend: self.legend.setVisible(self.show_legend) def create_legend(self): self.legend = LegendItem() self.legend.setParentItem(self.plot_widget.getViewBox()) self.legend.restoreAnchor(self.__legend_anchor) def remove_legend(self): if self.legend: anchor = legend_anchor_pos(self.legend) if anchor is not None: self.__legend_anchor = anchor self.legend.setParent(None) self.legend = None if self.color_legend: anchor = legend_anchor_pos(self.color_legend) if anchor is not None: self.__color_legend_anchor = anchor self.color_legend.setParent(None) self.color_legend = None def make_legend(self): self.remove_legend() self.make_color_legend() self.make_shape_legend() self.update_legend() def make_color_legend(self): if self.attr_color is None: return use_shape = self.attr_shape == self.get_color() if self.attr_color.is_discrete: if not self.legend: self.create_legend() palette = self.discrete_palette for i, value in enumerate(self._get_values(self.attr_color)): color = QColor(*palette.getRGB(i)) pen = _make_pen(color.darker(self.DarkerValue), 1.5) color.setAlpha( self.alpha_value if self.subset_indices is None else 255) brush = QBrush(color) self.legend.addItem( ScatterPlotItem( pen=pen, brush=brush, size=10, symbol=self.CurveSymbols[i] if use_shape else "o"), escape(value)) else: legend = self.color_legend = LegendItem() legend.setParentItem(self.plot_widget.getViewBox()) legend.restoreAnchor(self.__color_legend_anchor) label = PaletteItemSample(self.continuous_palette, self.scale) legend.addItem(label, "") legend.setGeometry(label.boundingRect()) def make_shape_legend(self): if self.attr_shape is None or self.attr_shape == self.get_color(): return if not self.legend: self.create_legend() color = QColor(0, 0, 0) color.setAlpha(self.alpha_value) for i, value in enumerate(self._get_values(self.attr_shape)): self.legend.addItem( ScatterPlotItem(pen=color, brush=color, size=10, symbol=self.CurveSymbols[i]), escape(value)) def zoom_button_clicked(self): self.plot_widget.getViewBox().setMouseMode( self.plot_widget.getViewBox().RectMode) def pan_button_clicked(self): self.plot_widget.getViewBox().setMouseMode( self.plot_widget.getViewBox().PanMode) def select_button_clicked(self): self.plot_widget.getViewBox().setMouseMode( self.plot_widget.getViewBox().RectMode) def reset_button_clicked(self): self.update_data(self.shown_x, self.shown_y, reset_view=True) # also redraw density image # self.view_box.autoRange() def select_by_click(self, _, points): if self.scatterplot_item is not None: self.select(points) def select_by_rectangle(self, value_rect): if self.scatterplot_item is not None: points = [ point for point in self.scatterplot_item.points() if value_rect.contains(QPointF(point.pos())) ] self.select(points) def unselect_all(self): self.selection = None self.update_colors(keep_colors=True) if self.label_only_selected: self.update_labels() self.master.selection_changed() def select(self, points): # noinspection PyArgumentList if self.data is None: return if self.selection is None: self.selection = np.zeros(len(self.data), dtype=np.uint8) indices = [p.data() for p in points] keys = QApplication.keyboardModifiers() # Remove from selection if keys & Qt.AltModifier: self.selection[indices] = 0 # Append to the last group elif keys & Qt.ShiftModifier and keys & Qt.ControlModifier: self.selection[indices] = np.max(self.selection) # Create a new group elif keys & Qt.ShiftModifier: self.selection[indices] = np.max(self.selection) + 1 # No modifiers: new selection else: self.selection = np.zeros(len(self.data), dtype=np.uint8) self.selection[indices] = 1 self.update_colors(keep_colors=True) if self.label_only_selected: self.update_labels() self.master.selection_changed() def get_selection(self): if self.selection is None: return np.array([], dtype=np.uint8) else: return np.flatnonzero(self.selection) def set_palette(self, p): self.plot_widget.setPalette(p) def save_to_file(self, size): pass def help_event(self, event): if self.scatterplot_item is None: return False domain = self.data.domain PARTS = (("Class", "Classes", 4, domain.class_vars), ("Meta", "Metas", 4, domain.metas), ("Feature", "Features", 10, domain.attributes)) def format_val(var, point_data, bold=False): text = escape('{} = {}'.format(var.name, point_data[var])) if bold: text = "<b>{}</b>".format(text) return text def show_part(point_data, singular, plural, max_shown, vars): cols = [ format_val(var, point_data) for var in vars[:max_shown + 2] if vars == domain.class_vars or var not in (self.shown_x, self.shown_y) ][:max_shown] if not cols: return "" n_vars = len(vars) if n_vars > max_shown: cols[-1] = "... and {} others".format(n_vars - max_shown + 1) return \ "<br/><b>{}</b>:<br/>".format(singular if n_vars < 2 else plural) \ + "<br/>".join(cols) def point_data(p): point_data = self.data[p.data()] text = "<br/>".join( format_val(var, point_data, bold=self.tooltip_shows_all) for var in (self.shown_x, self.shown_y)) if self.tooltip_shows_all: text += "<br/>" + \ "".join(show_part(point_data, *columns) for columns in PARTS) return text act_pos = self.scatterplot_item.mapFromScene(event.scenePos()) points = self.scatterplot_item.pointsAt(act_pos) if len(points): text = "<hr/>".join(point_data(point) for point in points) QToolTip.showText(event.screenPos(), text, widget=self.plot_widget) return True else: return False def box_zoom_select(self, parent): g = self.gui box_zoom_select = gui.vBox(parent, "Zoom/Select") zoom_select_toolbar = g.zoom_select_toolbar(box_zoom_select, nomargin=True, buttons=[ g.StateButtonsBegin, g.SimpleSelect, g.Pan, g.Zoom, g.StateButtonsEnd, g.ZoomReset ]) buttons = zoom_select_toolbar.buttons buttons[g.Zoom].clicked.connect(self.zoom_button_clicked) buttons[g.Pan].clicked.connect(self.pan_button_clicked) buttons[g.SimpleSelect].clicked.connect(self.select_button_clicked) buttons[g.ZoomReset].clicked.connect(self.reset_button_clicked) return box_zoom_select def zoom_actions(self, parent): def zoom(s): """ Zoom in/out by factor `s`. scaleBy scales the view's bounds (the axis range) """ self.view_box.scaleBy((1 / s, 1 / s)) def fit_to_view(): self.viewbox.autoRange() zoom_in = QAction("Zoom in", parent, triggered=lambda: zoom(1.25)) zoom_in.setShortcuts([ QKeySequence(QKeySequence.ZoomIn), QKeySequence(parent.tr("Ctrl+=")) ]) zoom_out = QAction("Zoom out", parent, shortcut=QKeySequence.ZoomOut, triggered=lambda: zoom(1 / 1.25)) zoom_fit = QAction("Fit in view", parent, shortcut=QKeySequence(Qt.ControlModifier | Qt.Key_0), triggered=fit_to_view) parent.addActions([zoom_in, zoom_out, zoom_fit])
class OWPivot(OWWidget): name = "Pivot Table" description = "Reshape data table based on column values." icon = "icons/Pivot.svg" priority = 1000 keywords = ["pivot", "group", "aggregate"] class Inputs: data = Input("Data", Table, default=True) class Outputs: pivot_table = Output("Pivot Table", Table, default=True) filtered_data = Output("Filtered Data", Table) grouped_data = Output("Grouped Data", Table) class Warning(OWWidget.Warning): # TODO - inconsistent for different variable types no_col_feature = Msg("Column feature should be selected.") cannot_aggregate = Msg("Some aggregations ({}) cannot be performed.") settingsHandler = DomainContextHandler() row_feature = ContextSetting(None) col_feature = ContextSetting(None) val_feature = ContextSetting(None) sel_agg_functions = Setting(set([Pivot.Count])) selection = ContextSetting(set()) auto_commit = Setting(True) AGGREGATIONS = (Pivot.Count, Pivot.Count_defined, None, Pivot.Sum, Pivot.Mean, Pivot.Mode, Pivot.Min, Pivot.Max, Pivot.Median, Pivot.Var, None, Pivot.Majority) def __init__(self): super().__init__() self.data = None # type: Table self.pivot = None # type: Pivot self._add_control_area_controls() self._add_main_area_controls() def _add_control_area_controls(self): box = gui.vBox(self.controlArea, "Rows") gui.comboBox(box, self, "row_feature", contentsLength=12, model=DomainModel(valid_types=DomainModel.PRIMITIVE), callback=self.__feature_changed) box = gui.vBox(self.controlArea, "Columns") gui.comboBox(box, self, "col_feature", contentsLength=12, model=DomainModel(placeholder="(Same as rows)", valid_types=DiscreteVariable), callback=self.__feature_changed) box = gui.vBox(self.controlArea, "Values") gui.comboBox(box, self, "val_feature", contentsLength=12, model=DomainModel(placeholder="(None)"), orientation=Qt.Horizontal, callback=self.__val_feature_changed) self.__add_aggregation_controls() gui.rubber(self.controlArea) gui.auto_apply(self.controlArea, self, "auto_commit") def __add_aggregation_controls(self): box = gui.vBox(self.controlArea, "Aggregations") for agg in self.AGGREGATIONS: if agg is None: gui.separator(box, height=1) line = QFrame() line.setFrameShape(QFrame.HLine) line.setLineWidth(1) line.setFrameShadow(QFrame.Sunken) box.layout().addWidget(line) continue check_box = QCheckBox(str(agg), box) check_box.setChecked(agg in self.sel_agg_functions) check_box.clicked.connect( lambda *args, a=agg: self.__aggregation_cb_clicked(a, args[0])) box.layout().addWidget(check_box) def _add_main_area_controls(self): self.table_view = PivotTableView() self.table_view.selection_changed.connect(self.__invalidate_filtered) self.mainArea.layout().addWidget(self.table_view) @property def no_col_feature(self): return self.col_feature is None and self.row_feature is not None \ and self.row_feature.is_continuous @property def skipped_aggs(self): def add(fun): data, var = self.data, self.val_feature return data and not var and fun not in Pivot.AutonomousFunctions \ or var and var.is_discrete and fun in Pivot.ContVarFunctions \ or var and var.is_continuous and fun in Pivot.DiscVarFunctions skipped = [str(fun) for fun in self.sel_agg_functions if add(fun)] return ", ".join(sorted(skipped)) def __feature_changed(self): self.selection = set() self.pivot = None self.commit() def __val_feature_changed(self): self.selection = set() if self.no_col_feature: return self.pivot.update_pivot_table(self.val_feature) self.commit() def __aggregation_cb_clicked(self, agg_fun: Pivot.Functions, checked: bool): self.selection = set() if checked: self.sel_agg_functions.add(agg_fun) else: self.sel_agg_functions.remove(agg_fun) if self.no_col_feature or not self.pivot or not self.data: return self.pivot.update_group_table(self.sel_agg_functions, self.val_feature) self.commit() def __invalidate_filtered(self): self.selection = self.table_view.get_selection() self.commit() @Inputs.data @check_sql_input def set_data(self, data): self.closeContext() self.data = data self.pivot = None self.check_data() self.init_attr_values() self.openContext(self.data) self.unconditional_commit() def check_data(self): self.clear_messages() if not self.data: self.table_view.clear() def init_attr_values(self): domain = self.data.domain if self.data and len(self.data) else None for attr in ("row_feature", "col_feature", "val_feature"): getattr(self.controls, attr).model().set_domain(domain) setattr(self, attr, None) model = self.controls.row_feature.model() if model: self.row_feature = model[0] model = self.controls.val_feature.model() if model and len(model) > 2: self.val_feature = domain.variables[0] \ if domain.variables[0] in model else model[2] def commit(self): if self.pivot is None: self.Warning.no_col_feature.clear() if self.no_col_feature: self.Warning.no_col_feature() return self.pivot = Pivot(self.data, self.sel_agg_functions, self.row_feature, self.col_feature, self.val_feature) self.Warning.cannot_aggregate.clear() if self.skipped_aggs: self.Warning.cannot_aggregate(self.skipped_aggs) self._update_graph() self.Outputs.grouped_data.send(self.pivot.group_table) self.Outputs.pivot_table.send(self.pivot.pivot_table) self.Outputs.filtered_data.send(self.get_filtered_data()) def _update_graph(self): self.table_view.clear() if self.pivot.pivot_table: col_feature = self.col_feature or self.row_feature self.table_view.update_table(col_feature.name, self.row_feature.name, *self.pivot.pivot_tables) self.table_view.set_selection(self.selection) def get_filtered_data(self): if not self.data or not self.selection or not self.pivot.pivot_table: return None cond = [] for i, j in self.selection: f = [] for at, val in [(self.row_feature, self.pivot.pivot_table.X[i, 0]), (self.col_feature, j)]: if isinstance(at, DiscreteVariable): f.append(FilterDiscrete(at, [val])) elif isinstance(at, ContinuousVariable): f.append(FilterContinuous(at, FilterContinuous.Equal, val)) cond.append(Values(f)) return Values([f for f in cond], conjunction=False)(self.data) def sizeHint(self): return QSize(640, 525) def send_report(self): self.report_items((("Row feature", self.row_feature), ("Column feature", self.col_feature), ("Value feature", self.val_feature))) if self.data and self.val_feature is not None: self.report_table("", self.table_view) if not self.data: self.report_items((("Group by", self.row_feature), )) self.report_table(self.table_view)
class OWTranspose(OWWidget): name = "Transpose" description = "Transpose data table." icon = "icons/Transpose.svg" priority = 2000 keywords = [] class Inputs: data = Input("Data", Table) class Outputs: data = Output("Data", Table, dynamic=False) GENERIC, FROM_VAR = range(2) resizing_enabled = False want_main_area = False DEFAULT_PREFIX = "Feature" settingsHandler = DomainContextHandler() feature_type = ContextSetting(GENERIC) feature_name = ContextSetting("") feature_names_column = ContextSetting(None) auto_apply = Setting(True) class Warning(OWWidget.Warning): duplicate_names = Msg("Values are not unique.\nTo avoid multiple " "features with the same name, values \nof " "'{}' have been augmented with indices.") class Error(OWWidget.Error): value_error = Msg("{}") def __init__(self): super().__init__() self.data = None # self.apply is changed later, pylint: disable=unnecessary-lambda box = gui.radioButtons(self.controlArea, self, "feature_type", box="Feature names", callback=lambda: self.apply()) button = gui.appendRadioButton(box, "Generic") edit = gui.lineEdit(gui.indentedBox(box, gui.checkButtonOffsetHint(button)), self, "feature_name", placeholderText="Type a prefix ...", toolTip="Custom feature name") edit.editingFinished.connect(self._apply_editing) self.meta_button = gui.appendRadioButton(box, "From variable:") self.feature_model = DomainModel(valid_types=(ContinuousVariable, StringVariable), alphabetical=False) self.feature_combo = gui.comboBox(gui.indentedBox( box, gui.checkButtonOffsetHint(button)), self, "feature_names_column", contentsLength=12, callback=self._feature_combo_changed, model=self.feature_model) self.apply_button = gui.auto_commit(self.controlArea, self, "auto_apply", "&Apply", box=False, commit=self.apply) self.apply_button.button.setAutoDefault(False) self.set_controls() def _apply_editing(self): self.feature_type = self.GENERIC self.feature_name = self.feature_name.strip() self.apply() def _feature_combo_changed(self): self.feature_type = self.FROM_VAR self.apply() @Inputs.data def set_data(self, data): # Skip the context if the combo is empty: a context with # feature_model == None would then match all domains if self.feature_model: self.closeContext() self.data = data self.set_controls() if self.feature_model: self.openContext(data) self.apply() def set_controls(self): self.feature_model.set_domain(self.data and self.data.domain) self.meta_button.setEnabled(bool(self.feature_model)) if self.feature_model: self.feature_names_column = self.feature_model[0] self.feature_type = self.FROM_VAR else: self.feature_names_column = None def apply(self): self.clear_messages() transposed = None if self.data: try: variable = self.feature_type == self.FROM_VAR and \ self.feature_names_column transposed = Table.transpose(self.data, variable, feature_name=self.feature_name or self.DEFAULT_PREFIX) if variable: names = self.data.get_column_view(variable)[0] if len(names) != len(set(names)): self.Warning.duplicate_names(variable) except ValueError as e: self.Error.value_error(e) self.Outputs.data.send(transposed) def send_report(self): if self.feature_type == self.GENERIC: names = self.feature_name or self.DEFAULT_PREFIX else: names = "from variable" if self.feature_names_column: names += " '{}'".format(self.feature_names_column.name) self.report_items("", [("Feature names", names)]) if self.data: self.report_data("Data", self.data)
class OWManifoldLearning(OWWidget): name = "Manifold Learning" description = "Nonlinear dimensionality reduction." icon = "icons/Manifold.svg" priority = 2200 keywords = [] settings_version = 2 class Inputs: data = Input("Data", Table) class Outputs: transformed_data = Output("Transformed Data", Table, dynamic=False, replaces=["Transformed data"]) MANIFOLD_METHODS = (TSNE, MDS, Isomap, LocallyLinearEmbedding, SpectralEmbedding) tsne_editor = SettingProvider(TSNEParametersEditor) mds_editor = SettingProvider(MDSParametersEditor) isomap_editor = SettingProvider(IsomapParametersEditor) lle_editor = SettingProvider(LocallyLinearEmbeddingParametersEditor) spectral_editor = SettingProvider(SpectralEmbeddingParametersEditor) resizing_enabled = False want_main_area = False manifold_method_index = Setting(0) n_components = Setting(2) auto_apply = Setting(True) class Error(OWWidget.Error): n_neighbors_too_small = Msg("For chosen method and components, " "neighbors must be greater than {}") manifold_error = Msg("{}") sparse_not_supported = Msg("Sparse data is not supported.") out_of_memory = Msg("Out of memory") class Warning(OWWidget.Warning): graph_not_connected = Msg("Disconnected graph, embedding may not work") @classmethod def migrate_settings(cls, settings, version): if version < 2: tsne_settings = settings.get('tsne_editor', {}) # Fixup initialization index if 'init_index' in tsne_settings: idx = tsne_settings.pop('init_index') idx = min(idx, len(TSNEParametersEditor.initialization_values)) tsne_settings['initialization_index'] = idx # We removed several metrics here if 'metric_index' in tsne_settings: idx = tsne_settings['metric_index'] idx = min(idx, len(TSNEParametersEditor.metric_values)) tsne_settings['metric_index'] = idx def __init__(self): self.data = None # GUI method_box = gui.vBox(self.controlArea, "Method") self.manifold_methods_combo = gui.comboBox( method_box, self, "manifold_method_index", items=[m.name for m in self.MANIFOLD_METHODS], callback=self.manifold_method_changed) self._set_input_summary() self._set_output_summary(None) self.params_box = gui.vBox(self.controlArea, "Parameters") self.tsne_editor = TSNEParametersEditor(self) self.mds_editor = MDSParametersEditor(self) self.isomap_editor = IsomapParametersEditor(self) self.lle_editor = LocallyLinearEmbeddingParametersEditor(self) self.spectral_editor = SpectralEmbeddingParametersEditor(self) self.parameter_editors = [ self.tsne_editor, self.mds_editor, self.isomap_editor, self.lle_editor, self.spectral_editor ] for editor in self.parameter_editors: self.params_box.layout().addWidget(editor) editor.hide() self.params_widget = self.parameter_editors[self.manifold_method_index] self.params_widget.show() output_box = gui.vBox(self.controlArea, "Output") self.n_components_spin = gui.spin(output_box, self, "n_components", 1, 10, label="Components:", alignment=Qt.AlignRight, callbackOnReturn=True, callback=self.settings_changed) self.apply_button = gui.auto_apply(self.controlArea, self, box=False, commit=self.apply) def manifold_method_changed(self): self.params_widget.hide() self.params_widget = self.parameter_editors[self.manifold_method_index] self.params_widget.show() self.apply() def settings_changed(self): self.apply() @Inputs.data def set_data(self, data): self.data = data self._set_input_summary() self.n_components_spin.setMaximum( len(self.data.domain.attributes) if self.data else 10) self.unconditional_apply() def apply(self): builtin_warn = warnings.warn def _handle_disconnected_graph_warning(msg, *args, **kwargs): if msg.startswith("Graph is not fully connected"): self.Warning.graph_not_connected() else: builtin_warn(msg, *args, **kwargs) out = None data = self.data method = self.MANIFOLD_METHODS[self.manifold_method_index] have_data = data is not None and len(data) self.Error.clear() self.Warning.clear() if have_data and data.is_sparse(): self.Error.sparse_not_supported() elif have_data: names = [ var.name for var in chain(data.domain.class_vars, data.domain.metas) if var ] proposed = ["C{}".format(i) for i in range(self.n_components)] unique = get_unique_names(names, proposed) domain = Domain([ContinuousVariable(name) for name in unique], data.domain.class_vars, data.domain.metas) try: warnings.warn = _handle_disconnected_graph_warning projector = method(**self.get_method_parameters(data, method)) model = projector(data) if isinstance(model, TSNEModel): out = model.embedding else: X = model.embedding_ out = Table(domain, X, data.Y, data.metas) except ValueError as e: if e.args[0] == "for method='hessian', n_neighbors " \ "must be greater than [n_components" \ " * (n_components + 3) / 2]": n = self.n_components * (self.n_components + 3) / 2 self.Error.n_neighbors_too_small("{}".format(n)) else: self.Error.manifold_error(e.args[0]) except MemoryError: self.Error.out_of_memory() except np.linalg.linalg.LinAlgError as e: self.Error.manifold_error(str(e)) finally: warnings.warn = builtin_warn self._set_output_summary(out) self.Outputs.transformed_data.send(out) def _set_input_summary(self): summary = len(self.data) if self.data else self.info.NoInput details = format_summary_details(self.data) if self.data else "" self.info.set_input_summary(summary, details) def _set_output_summary(self, output): summary = len(output) if output else self.info.NoOutput details = format_summary_details(output) if output else "" self.info.set_output_summary(summary, details) def get_method_parameters(self, data, method): parameters = dict(n_components=self.n_components) parameters.update(self.params_widget.get_parameters()) return parameters def send_report(self): method = self.MANIFOLD_METHODS[self.manifold_method_index] self.report_items((("Method", method.name), )) parameters = self.get_method_parameters(self.data, method) self.report_items("Method parameters", tuple(parameters.items())) if self.data: self.report_data("Data", self.data)
class OWExplainFeatureBase(OWWidget, ConcurrentWidgetMixin, openclass=True): class Inputs: data = Input("Data", Table, default=True) model = Input("Model", Model) class Outputs: selected_data = Output("Selected Data", Table) scores = Output("Scores", Table) class Error(OWWidget.Error): domain_transform_err = Msg("{}") unknown_err = Msg("An error occurred.\n{}") class Information(OWWidget.Information): data_sampled = Msg("Data has been sampled.") settingsHandler = NotImplemented n_attributes = Setting(10) zoom_level = Setting(0) selection = Setting((), schema_only=True) auto_send = Setting(True) visual_settings = Setting({}, schema_only=True) graph_name = "scene" PLOT_CLASS = FeaturesPlot def __init__(self): OWWidget.__init__(self) ConcurrentWidgetMixin.__init__(self) self.results: Optional[BaseResults] = None self.data: Optional[Table] = None self.model: Optional[Model] = None self.plot: Optional[FeaturesPlot] = None self.scene: Optional[GraphicsScene] = None self.view: Optional[GraphicsView] = None self.setup_gui() self.__pending_selection = self.selection initial = self.PLOT_CLASS().parameter_setter.initial_settings VisualSettingsDialog(self, initial) # GUI setup def setup_gui(self): self._add_controls() self._add_plot() self._add_buttons() self.info.set_input_summary(self.info.NoInput) self.info.set_output_summary(self.info.NoOutput) def _add_plot(self): self.scene = GraphicsScene() self.view = GraphicsView(self.scene) self.view.resized.connect(self._update_plot) self.view.setRenderHint(QPainter.Antialiasing, True) self.view.setAlignment(Qt.AlignTop | Qt.AlignLeft) self.mainArea.layout().addWidget(self.view) def _add_controls(self): self.display_box = box = gui.vBox(self.controlArea, "Display") gui.spin(box, self, "n_attributes", 1, MAX_N_ITEMS, label="Top features:", controlWidth=50, callback=self.__n_spin_changed) gui.hSlider(box, self, "zoom_level", minValue=0, maxValue=200, width=150, label="Zoom:", createLabel=False, callback=self.__zoom_slider_changed) def _add_buttons(self): gui.rubber(self.controlArea) gui.auto_send(self.buttonsArea, self, "auto_send") def __zoom_slider_changed(self): if self.plot is not None: self.plot.set_height(self.zoom_level) def __n_spin_changed(self): if self.plot is not None: self.plot.set_n_visible(self.n_attributes) # Inputs @Inputs.data @check_sql_input def set_data(self, data: Optional[Table]): self.data = data summary = len(data) if data else self.info.NoInput details = format_summary_details(data) if data else "" self.info.set_input_summary(summary, details) self._check_data() def _check_data(self): pass @Inputs.model def set_model(self, model: Optional[Model]): self.closeContext() self.model = model self.setup_controls() self.openContext(self.model) def setup_controls(self): pass def handleNewSignals(self): self.clear() self.start(self.run, *self.get_runner_parameters()) def get_runner_parameters(self) -> Tuple: return self.data, self.model def clear(self): self.results = None self.cancel() self._clear_selection() self._clear_scene() self.Error.domain_transform_err.clear() self.Error.unknown_err.clear() self.Information.data_sampled.clear() # Plot setup def _clear_scene(self): self.scene.clear() self.scene.setSceneRect(QRectF()) self.view.setSceneRect(QRectF()) self.view.setHeaderSceneRect(QRectF()) self.view.setFooterSceneRect(QRectF()) self.plot = None def update_scene(self): self._clear_scene() def setup_plot(self, values: np.ndarray, names: List[str], *plot_args): width = int(self.view.viewport().rect().width()) self.plot = self.PLOT_CLASS() self.plot.set_data(values, names, self.n_attributes, width, *plot_args) self.plot.apply_visual_settings(self.visual_settings) self.plot.selection_cleared.connect(self._clear_selection) self.plot.selection_changed.connect(self.update_selection) self.plot.layout().activate() self.plot.geometryChanged.connect(self._update_scene_rect) self.plot.resized.connect(self._update_plot) self.scene.addItem(self.plot) self.scene.mouse_clicked.connect(self.plot.deselect) self._update_scene_rect() self._update_plot() def _update_scene_rect(self): def extend_horizontal(rect): rect = QRectF(rect) rect.setLeft(geom.left()) rect.setRight(geom.right()) return rect geom = self.plot.geometry() self.scene.setSceneRect(geom) self.view.setSceneRect(geom) footer_geom = self.plot.bottom_axis.geometry() footer = extend_horizontal(footer_geom.adjusted(0, -3, 0, 10)) self.view.setFooterSceneRect(footer) def _update_plot(self): if self.plot is not None: width = int(self.view.viewport().rect().width()) self.plot.rescale(width) # Selection def _clear_selection(self): if self.selection: self.selection = () self.commit() def update_selection(self, *_): raise NotImplementedError def select_pending(self, pending_selection: Tuple): self.__pending_selection = pending_selection self.unconditional_commit() # Outputs def commit(self): selected_data = self.get_selected_data() if not selected_data: self.info.set_output_summary(self.info.NoOutput) else: detail = format_summary_details(selected_data) self.info.set_output_summary(len(selected_data), detail) self.Outputs.selected_data.send(selected_data) def get_selected_data(self) -> Optional[Table]: raise NotImplementedError def update_scores(self): scores = None if self.results is not None: scores = self.get_scores_table() self.Outputs.scores.send(scores) def get_scores_table(self) -> Table: raise NotImplementedError # Concurrent def on_partial_result(self, _): pass def on_done(self, results: Optional[BaseResults]): self.results = results if self.data and results is not None and not all(results.mask): self.Information.data_sampled() self.update_scene() self.update_scores() self.select_pending(self.__pending_selection) def on_exception(self, ex: Exception): if isinstance(ex, DomainTransformationError): self.Error.domain_transform_err(ex) else: self.Error.unknown_err(ex) def onDeleteWidget(self): self.shutdown() super().onDeleteWidget() # Misc def sizeHint(self) -> QSizeF: sh = self.controlArea.sizeHint() return sh.expandedTo(QSize(800, 520)) def send_report(self): if not self.data or not self.model: return self.report_plot() def set_visual_settings(self, key, value): self.visual_settings[key] = value if self.plot is not None: self.plot.parameter_setter.set_parameter(key, value) @staticmethod def run(data: Table, model: Model, *, state: TaskState) -> BaseResults: raise NotImplementedError
class OWTreeGraph(OWTreeViewer2D): """Graphical visualization of tree models""" name = "树查看器" icon = "icons/TreeViewer.svg" priority = 35 keywords = [] class Inputs: # Had different input names before merging from # Classification/Regression tree variants tree = Input("树", TreeModel, replaces=["Classification Tree", "Regression Tree"]) class Outputs: selected_data = Output("被选数据", Table, default=True, id="selected-data") annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table, id="annotated-data") settingsHandler = ClassValuesContextHandler() target_class_index = ContextSetting(0) regression_colors = Setting(0) replaces = [ "Orange.widgets.classify.owclassificationtreegraph.OWClassificationTreeGraph", "Orange.widgets.classify.owregressiontreegraph.OWRegressionTreeGraph" ] COL_OPTIONS = ["Default", "Number of instances", "Mean value", "Variance"] COL_DEFAULT, COL_INSTANCE, COL_MEAN, COL_VARIANCE = range(4) def __init__(self): super().__init__() self.domain = None self.dataset = None self.clf_dataset = None self.tree_adapter = None self.color_label = QLabel("目标类: ") combo = self.color_combo = gui.OrangeComboBox() combo.setSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed) combo.setSizeAdjustPolicy( QComboBox.AdjustToMinimumContentsLengthWithIcon) combo.setMinimumContentsLength(8) combo.activated[int].connect(self.color_changed) self.display_box.layout().addRow(self.color_label, combo) def set_node_info(self): """Set the content of the node""" for node in self.scene.nodes(): node.set_rect(QRectF()) self.update_node_info(node) w = max([n.rect().width() for n in self.scene.nodes()] + [0]) if w > self.max_node_width: w = self.max_node_width for node in self.scene.nodes(): rect = node.rect() node.set_rect(QRectF(rect.x(), rect.y(), w, rect.height())) self.scene.fix_pos(self.root_node, 10, 10) def _update_node_info_attr_name(self, node, text): attr = self.tree_adapter.attribute(node.node_inst) if attr is not None: text += "<hr/>{}".format(attr.name) return text def activate_loaded_settings(self): if not self.model: return super().activate_loaded_settings() if self.domain.class_var.is_discrete: self.color_combo.setCurrentIndex(self.target_class_index) self.toggle_node_color_cls() else: self.color_combo.setCurrentIndex(self.regression_colors) self.toggle_node_color_reg() self.set_node_info() def color_changed(self, i): if self.domain.class_var.is_discrete: self.target_class_index = i self.toggle_node_color_cls() self.set_node_info() else: self.regression_colors = i self.toggle_node_color_reg() def toggle_node_size(self): self.set_node_info() self.scene.update() self.scene_view.repaint() def toggle_color_cls(self): self.toggle_node_color_cls() self.set_node_info() self.scene.update() def toggle_color_reg(self): self.toggle_node_color_reg() self.set_node_info() self.scene.update() @Inputs.tree def ctree(self, model=None): """Input signal handler""" self.clear_scene() self.color_combo.clear() self.closeContext() self.model = model self.target_class_index = 0 if model is None: self.info.setText('没有树。') self.root_node = None self.dataset = None self.tree_adapter = None else: self.tree_adapter = self._get_tree_adapter(model) self.domain = model.domain self.dataset = model.instances if self.dataset is not None and self.dataset.domain != self.domain: self.clf_dataset = self.dataset.transform(model.domain) else: self.clf_dataset = self.dataset class_var = self.domain.class_var if class_var.is_discrete: self.scene.colors = [QColor(*col) for col in class_var.colors] self.color_label.setText("目标类: ") self.color_combo.addItem("无") self.color_combo.addItems(self.domain.class_vars[0].values) self.color_combo.setCurrentIndex(self.target_class_index) else: self.scene.colors = \ ContinuousPaletteGenerator(*model.domain.class_var.colors) self.color_label.setText("调色: ") self.color_combo.addItems(self.COL_OPTIONS) self.color_combo.setCurrentIndex(self.regression_colors) self.openContext(self.domain.class_var) # self.root_node = self.walkcreate(model.root, None) self.root_node = self.walkcreate(self.tree_adapter.root) self.info.setText('{} nodes, {} leaves'.format( self.tree_adapter.num_nodes, len(self.tree_adapter.leaves(self.tree_adapter.root)))) self.setup_scene() self.Outputs.selected_data.send(None) self.Outputs.annotated_data.send( create_annotated_table(self.dataset, [])) def walkcreate(self, node, parent=None): """Create a structure of tree nodes from the given model""" node_obj = TreeNode(self.tree_adapter, node, parent) self.scene.addItem(node_obj) if parent: edge = GraphicsEdge(node1=parent, node2=node_obj) self.scene.addItem(edge) parent.graph_add_edge(edge) for child_inst in self.tree_adapter.children(node): if child_inst is not None: self.walkcreate(child_inst, node_obj) return node_obj def node_tooltip(self, node): return "<br>".join( to_html(str(rule)) for rule in self.tree_adapter.rules(node.node_inst)) def update_selection(self): if self.model is None: return nodes = [ item.node_inst for item in self.scene.selectedItems() if isinstance(item, TreeNode) ] data = self.tree_adapter.get_instances_in_nodes(nodes) self.Outputs.selected_data.send(data) self.Outputs.annotated_data.send( create_annotated_table(self.dataset, self.tree_adapter.get_indices(nodes))) def send_report(self): if not self.model: return items = [ ("Tree size", self.info.text()), ( "Edge widths", ("固定", "相对于根", "相对于父")[ # pylint: disable=invalid-sequence-index self.line_width_method]) ] if self.domain.class_var.is_discrete: items.append(("Target class", self.color_combo.currentText())) elif self.regression_colors != self.COL_DEFAULT: items.append( ("Color by", self.COL_OPTIONS[self.regression_colors])) self.report_items(items) self.report_plot(self.scene) def update_node_info(self, node): if self.domain.class_var.is_discrete: self.update_node_info_cls(node) else: self.update_node_info_reg(node) def update_node_info_cls(self, node): """Update the printed contents of the node for classification trees""" node_inst = node.node_inst distr = self.tree_adapter.get_distribution(node_inst)[0] total = self.tree_adapter.num_samples(node_inst) distr = distr / np.sum(distr) if self.target_class_index: tabs = distr[self.target_class_index - 1] text = "" else: modus = np.argmax(distr) tabs = distr[modus] text = f"{self.domain.class_vars[0].values[int(modus)]}<br/>" if tabs > 0.999: text += f"100%, {total}/{total}" else: text += f"{100 * tabs:2.1f}%, {int(total * tabs)}/{total}" text = self._update_node_info_attr_name(node, text) node.setHtml( f'<p style="line-height: 120%; margin-bottom: 0">{text}</p>') def update_node_info_reg(self, node): """Update the printed contents of the node for regression trees""" node_inst = node.node_inst mean, var = self.tree_adapter.get_distribution(node_inst)[0] insts = self.tree_adapter.num_samples(node_inst) text = f"{mean:.1f} ± {var:.1f}<br/>" text += f"{insts} instances" text = self._update_node_info_attr_name(node, text) node.setHtml( f'<p style="line-height: 120%; margin-bottom: 0">{text}</p>') def toggle_node_color_cls(self): """Update the node color for classification trees""" colors = self.scene.colors for node in self.scene.nodes(): distr = node.tree_adapter.get_distribution(node.node_inst)[0] total = sum(distr) if self.target_class_index: p = distr[self.target_class_index - 1] / total color = colors[self.target_class_index - 1].lighter(200 - 100 * p) else: modus = np.argmax(distr) p = distr[modus] / (total or 1) color = colors[int(modus)].lighter(300 - 200 * p) node.backgroundBrush = QBrush(color) self.scene.update() def toggle_node_color_reg(self): """Update the node color for regression trees""" def_color = QColor(192, 192, 255) if self.regression_colors == self.COL_DEFAULT: brush = QBrush(def_color.lighter(100)) for node in self.scene.nodes(): node.backgroundBrush = brush elif self.regression_colors == self.COL_INSTANCE: max_insts = len( self.tree_adapter.get_instances_in_nodes( [self.tree_adapter.root])) for node in self.scene.nodes(): node_insts = len( self.tree_adapter.get_instances_in_nodes([node.node_inst])) node.backgroundBrush = QBrush( def_color.lighter(120 - 20 * node_insts / max_insts)) elif self.regression_colors == self.COL_MEAN: minv = np.nanmin(self.dataset.Y) maxv = np.nanmax(self.dataset.Y) fact = 1 / (maxv - minv) if minv != maxv else 1 colors = self.scene.colors for node in self.scene.nodes(): node_mean = self.tree_adapter.get_distribution( node.node_inst)[0][0] node.backgroundBrush = QBrush(colors[fact * (node_mean - minv)]) else: nodes = list(self.scene.nodes()) variances = [ self.tree_adapter.get_distribution(node.node_inst)[0][1] for node in nodes ] max_var = max(variances) for node, var in zip(nodes, variances): node.backgroundBrush = QBrush( def_color.lighter(120 - 20 * var / max_var)) self.scene.update() def _get_tree_adapter(self, model): if isinstance(model, SklModel): return SklTreeAdapter(model) return TreeAdapter(model)
class RadvizVizRank(VizRankDialog, OWComponent): captionTitle = "Score Plots" n_attrs = Setting(3) minK = 10 attrsSelected = Signal([]) _AttrRole = next(gui.OrangeUserRole) percent_data_used = Setting(100) def __init__(self, master): """Add the spin box for maximal number of attributes""" VizRankDialog.__init__(self, master) OWComponent.__init__(self, master) self.master = master self.n_neighbors = 10 max_n_attrs = len(master.model_selected) + len(master.model_other) - 1 box = gui.hBox(self) self.n_attrs_spin = gui.spin( box, self, "n_attrs", 3, max_n_attrs, label="Maximum number of variables: ", controlWidth=50, alignment=Qt.AlignRight, callback=self._n_attrs_changed) gui.rubber(box) self.last_run_n_attrs = None self.attr_color = master.attr_color self.attr_ordering = None self.data = None self.valid_data = None def initialize(self): super().initialize() self.attr_color = self.master.attr_color def _compute_attr_order(self): """ used by VizRank to evaluate attributes """ master = self.master attrs = [v for v in chain(master.model_selected[:], master.model_other[:]) if v is not self.attr_color] data = self.master.data.transform(Domain(attributes=attrs, class_vars=self.attr_color)) self.data = data self.valid_data = np.hstack((~np.isnan(data.X), ~np.isnan(data.Y.reshape(len(data.Y), 1)))) relief = ReliefF if self.attr_color.is_discrete else RReliefF weights = relief(n_iterations=100, k_nearest=self.minK)(data) attrs = sorted(zip(weights, attrs), key=lambda x: (-x[0], x[1].name)) self.attr_ordering = attr_ordering = [a for _, a in attrs] return attr_ordering def _evaluate_projection(self, x, y): """ kNNEvaluate - evaluate class separation in the given projection using a k-NN method Parameters ---------- x - variables to evaluate y - class Returns ------- scores """ if self.percent_data_used != 100: rand = np.random.choice(len(x), int(len(x) * self.percent_data_used / 100), replace=False) x = x[rand] y = y[rand] neigh = KNeighborsClassifier(n_neighbors=3) if self.attr_color.is_discrete else \ KNeighborsRegressor(n_neighbors=3) assert ~(np.isnan(x).any(axis=None) | np.isnan(x).any(axis=None)) neigh.fit(x, y) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) scores = cross_val_score(neigh, x, y, cv=3) return scores.mean() def _n_attrs_changed(self): """ Change the button label when the number of attributes changes. The method does not reset anything so the user can still see the results until actually restarting the search. """ if self.n_attrs != self.last_run_n_attrs or self.saved_state is None: self.button.setText("Start") else: self.button.setText("Continue") self.button.setEnabled(self.check_preconditions()) def progressBarSet(self, value, processEvents=None): self.setWindowTitle(self.captionTitle + " Evaluated {} permutations".format(value)) if processEvents is not None and processEvents is not False: qApp.processEvents(processEvents) def check_preconditions(self): master = self.master if not super().check_preconditions(): return False elif not master.btn_vizrank.isEnabled(): return False self.n_attrs_spin.setMaximum(20) # all primitive vars except color one return True def on_selection_changed(self, selected, deselected): attrs = selected.indexes()[0].data(self._AttrRole) self.selectionChanged.emit([attrs]) def iterate_states(self, state): if state is None: # on the first call, compute order self.attrs = self._compute_attr_order() state = list(range(3)) else: state = list(state) def combinations(n, s): while True: yield s for up, _ in enumerate(s): s[up] += 1 if up + 1 == len(s) or s[up] < s[up + 1]: break s[up] = up if s[-1] == n: if len(s) < self.n_attrs: s = list(range(len(s) + 1)) else: break for c in combinations(len(self.attrs), state): for p in islice(permutations(c[1:]), factorial(len(c) - 1) // 2): yield (c[0],) + p def compute_score(self, state): attrs = [self.attrs[i] for i in state] domain = Domain(attributes=attrs, class_vars=[self.attr_color]) data = self.data.transform(domain) radviz_xy, _, mask = radviz(data, attrs) y = data.Y[mask] return -self._evaluate_projection(radviz_xy, y) def bar_length(self, score): return -score def row_for_state(self, score, state): attrs = [self.attrs[s] for s in state] item = QStandardItem("[{:0.6f}] ".format(-score) + ", ".join(a.name for a in attrs)) item.setData(attrs, self._AttrRole) return [item] def _update_progress(self): self.progressBarSet(int(self.saved_progress)) def before_running(self): """ Disable the spin for number of attributes before running and enable afterwards. Also, if the number of attributes is different than in the last run, reset the saved state (if it was paused). """ if self.n_attrs != self.last_run_n_attrs: self.saved_state = None self.saved_progress = 0 if self.saved_state is None: self.scores = [] self.rank_model.clear() self.last_run_n_attrs = self.n_attrs self.n_attrs_spin.setDisabled(True) def stopped(self): self.n_attrs_spin.setDisabled(False)
class OWCorpus(OWWidget): name = "Corpus" description = "Load a corpus of text documents." icon = "icons/TextFile.svg" priority = 100 replaces = ["orangecontrib.text.widgets.owloadcorpus.OWLoadCorpus"] class Inputs: data = Input('Data', Table) class Outputs: corpus = Output('Corpus', Corpus) want_main_area = False resizing_enabled = True dlgFormats = ("All readable files ({});;".format( '*' + ' *'.join(FileFormat.readers.keys())) + ";;".join( "{} (*{})".format(f.DESCRIPTION, ' *'.join(f.EXTENSIONS)) for f in sorted(set(FileFormat.readers.values()), key=list(FileFormat.readers.values()).index))) settingsHandler = PerfectDomainContextHandler( match_values=PerfectDomainContextHandler.MATCH_VALUES_ALL) recent_files = Setting([ "book-excerpts.tab", "grimm-tales-selected.tab", "election-tweets-2016.tab", "friends-transcripts.tab", "andersen.tab", ]) used_attrs = ContextSetting([]) class Error(OWWidget.Error): read_file = Msg("Can't read file {} ({})") no_text_features_used = Msg("At least one text feature must be used.") corpus_without_text_features = Msg( "Corpus doesn't have any textual features.") def __init__(self): super().__init__() self.corpus = None # Browse file box fbox = gui.widgetBox(self.controlArea, "Corpus file", orientation=0) self.file_widget = widgets.FileWidget( recent_files=self.recent_files, icon_size=(16, 16), on_open=self.open_file, dialog_format=self.dlgFormats, dialog_title='Open Orange Document Corpus', reload_label='Reload', browse_label='Browse', allow_empty=False, minimal_width=250, ) fbox.layout().addWidget(self.file_widget) # Corpus info ibox = gui.widgetBox(self.controlArea, "Corpus info", addSpace=True) self.info_label = gui.label(ibox, self, "") self.update_info() # Used Text Features fbox = gui.widgetBox(self.controlArea, orientation=0) ubox = gui.widgetBox(fbox, "Used text features", addSpace=False) self.used_attrs_model = VariableListModel(enable_dnd=True) self.used_attrs_view = VariablesListItemView() self.used_attrs_view.setModel(self.used_attrs_model) ubox.layout().addWidget(self.used_attrs_view) aa = self.used_attrs_model aa.dataChanged.connect(self.update_feature_selection) aa.rowsInserted.connect(self.update_feature_selection) aa.rowsRemoved.connect(self.update_feature_selection) # Ignored Text Features ibox = gui.widgetBox(fbox, "Ignored text features", addSpace=False) self.unused_attrs_model = VariableListModel(enable_dnd=True) self.unused_attrs_view = VariablesListItemView() self.unused_attrs_view.setModel(self.unused_attrs_model) ibox.layout().addWidget(self.unused_attrs_view) # Documentation Data Sets & Report box = gui.hBox(self.controlArea) self.browse_documentation = gui.button( box, self, "Browse documentation corpora", callback=lambda: self.file_widget.browse(get_sample_corpora_dir()), autoDefault=False, ) box.layout().addWidget(self.report_button) # load first file self.file_widget.select(0) @Inputs.data def set_data(self, data): have_data = data is not None # Enable/Disable command when data from input self.file_widget.setEnabled(not have_data) self.browse_documentation.setEnabled(not have_data) if have_data: self.open_file(data=data) else: self.file_widget.reload() def open_file(self, path=None, data=None): self.closeContext() self.Error.clear() self.unused_attrs_model[:] = [] self.used_attrs_model[:] = [] if data: self.corpus = Corpus.from_table(data.domain, data) elif path: try: self.corpus = Corpus.from_file(path) self.corpus.name = os.path.splitext(os.path.basename(path))[0] except BaseException as err: self.Error.read_file(path, str(err)) else: return self.update_info() self.used_attrs = list(self.corpus.text_features) if not self.corpus.text_features: self.Error.corpus_without_text_features() self.Outputs.corpus.send(None) return self.openContext(self.corpus) self.used_attrs_model.extend(self.used_attrs) self.unused_attrs_model.extend([ f for f in self.corpus.domain.metas if f.is_string and f not in self.used_attrs_model ]) def update_info(self): def describe(corpus): dom = corpus.domain text_feats = sum(m.is_string for m in dom.metas) other_feats = len(dom.attributes) + len(dom.metas) - text_feats text = \ "{} document(s), {} text features(s), {} other feature(s).". \ format(len(corpus), text_feats, other_feats) if dom.has_continuous_class: text += "<br/>Regression; numerical class." elif dom.has_discrete_class: text += "<br/>Classification; discrete class with {} values.". \ format(len(dom.class_var.values)) elif corpus.domain.class_vars: text += "<br/>Multi-target; {} target variables.".format( len(corpus.domain.class_vars)) else: text += "<br/>Data has no target variable." text += "</p>" return text if self.corpus is None: self.info_label.setText("No corpus loaded.") else: self.info_label.setText(describe(self.corpus)) def update_feature_selection(self): self.Error.no_text_features_used.clear() # TODO fix VariablesListItemView so it does not emit # duplicated data when reordering inside a single window def remove_duplicates(l): unique = [] for i in l: if i not in unique: unique.append(i) return unique if self.corpus is not None: self.corpus.set_text_features( remove_duplicates(self.used_attrs_model)) self.used_attrs = list(self.used_attrs_model) if len(self.unused_attrs_model ) > 0 and not self.corpus.text_features: self.Error.no_text_features_used() # prevent sending "empty" corpora dom = self.corpus.domain empty = not (dom.variables or dom.metas) \ or len(self.corpus) == 0 \ or not self.corpus.text_features self.Outputs.corpus.send(self.corpus if not empty else None) def send_report(self): def describe(features): if len(features): return ', '.join([f.name for f in features]) else: return '(none)' if self.corpus is not None: domain = self.corpus.domain self.report_items('Corpus', ( ("File", self.file_widget.get_selected_filename()), ("Documents", len(self.corpus)), ("Used text features", describe(self.used_attrs_model)), ("Ignored text features", describe(self.unused_attrs_model)), ('Other features', describe(domain.attributes)), ('Target', describe(domain.class_vars)), ))
class OWPaintData(OWWidget): TOOLS = [("Brush", "Create multiple instances", AirBrushTool, _icon("brush.svg")), ("Put", "Put individual instances", PutInstanceTool, _icon("put.svg")), ("Select", "Select and move instances", SelectTool, _icon("select-transparent_42px.png")), ("Jitter", "Jitter instances", JitterTool, _icon("jitter.svg")), ("Magnet", "Attract multiple instances", MagnetTool, _icon("magnet.svg")), ("Clear", "Clear the plot", ClearTool, _icon("../../../icons/Dlg_clear.png"))] name = "Paint Data" description = "Create data by painting data points on a plane." icon = "icons/PaintData.svg" priority = 60 keywords = ["create", "draw"] class Inputs: data = Input("Data", Table) class Outputs: data = Output("Data", Table) autocommit = Setting(True) table_name = Setting("Painted data") attr1 = Setting("x") attr2 = Setting("y") hasAttr2 = Setting(True) brushRadius = Setting(75) density = Setting(7) symbol_size = Setting(10) #: current data array (shape=(N, 3)) as presented on the output data = Setting(None, schema_only=True) labels = Setting(["C1", "C2"], schema_only=True) graph_name = "plot" class Warning(OWWidget.Warning): no_input_variables = Msg("Input data has no variables") continuous_target = Msg("Continuous target value can not be used.") sparse_not_supported = Msg("Sparse data is ignored.") renamed_vars = Msg("Some variables have been renamed " "to avoid duplicates.\n{}") class Information(OWWidget.Information): use_first_two = \ Msg("Paint Data uses data from the first two attributes.") def __init__(self): super().__init__() self.input_data = None self.input_classes = [] self.input_colors = None self.input_has_attr2 = True self.current_tool = None self._selected_indices = None self._scatter_item = None #: A private data buffer (can be modified in place). `self.data` is #: a copy of this array (as seen when the `invalidate` method is #: called self.__buffer = None self.undo_stack = QUndoStack(self) self.class_model = ColoredListModel( self.labels, self, flags=Qt.ItemIsSelectable | Qt.ItemIsEnabled | Qt.ItemIsEditable) self.class_model.dataChanged.connect(self._class_value_changed) self.class_model.rowsInserted.connect(self._class_count_changed) self.class_model.rowsRemoved.connect(self._class_count_changed) if not self.data: self.data = [] self.__buffer = np.zeros((0, 3)) elif isinstance(self.data, np.ndarray): self.__buffer = self.data.copy() self.data = self.data.tolist() else: self.__buffer = np.array(self.data) self.colors = colorpalettes.DefaultRGBColors self.tools_cache = {} self._init_ui() self.commit() def _init_ui(self): namesBox = gui.vBox(self.controlArea, "Names") hbox = gui.hBox(namesBox, margin=0, spacing=0) gui.lineEdit(hbox, self, "attr1", "Variable X: ", controlWidth=80, orientation=Qt.Horizontal, callback=self._attr_name_changed) gui.separator(hbox, 21) hbox = gui.hBox(namesBox, margin=0, spacing=0) attr2 = gui.lineEdit(hbox, self, "attr2", "Variable Y: ", controlWidth=80, orientation=Qt.Horizontal, callback=self._attr_name_changed) gui.separator(hbox) gui.checkBox(hbox, self, "hasAttr2", '', disables=attr2, labelWidth=0, callback=self.set_dimensions) gui.separator(namesBox) gui.widgetLabel(namesBox, "Labels") self.classValuesView = listView = gui.ListViewWithSizeHint( preferred_size=(-1, 30)) listView.setModel(self.class_model) itemmodels.select_row(listView, 0) namesBox.layout().addWidget(listView) self.addClassLabel = QAction("+", self, toolTip="Add new class label", triggered=self.add_new_class_label) self.removeClassLabel = QAction( unicodedata.lookup("MINUS SIGN"), self, toolTip="Remove selected class label", triggered=self.remove_selected_class_label) actionsWidget = itemmodels.ModelActionsWidget( [self.addClassLabel, self.removeClassLabel], self) actionsWidget.layout().addStretch(10) actionsWidget.layout().setSpacing(1) namesBox.layout().addWidget(actionsWidget) tBox = gui.vBox(self.controlArea, "Tools", addSpace=True) buttonBox = gui.hBox(tBox) toolsBox = gui.widgetBox(buttonBox, orientation=QGridLayout()) self.toolActions = QActionGroup(self) self.toolActions.setExclusive(True) self.toolButtons = [] for i, (name, tooltip, tool, icon) in enumerate(self.TOOLS): action = QAction( name, self, toolTip=tooltip, checkable=tool.checkable, icon=QIcon(icon), ) action.triggered.connect(partial(self.set_current_tool, tool)) button = QToolButton(iconSize=QSize(24, 24), toolButtonStyle=Qt.ToolButtonTextUnderIcon, sizePolicy=QSizePolicy( QSizePolicy.MinimumExpanding, QSizePolicy.Fixed)) button.setDefaultAction(action) self.toolButtons.append((button, tool)) toolsBox.layout().addWidget(button, i / 3, i % 3) self.toolActions.addAction(action) for column in range(3): toolsBox.layout().setColumnMinimumWidth(column, 10) toolsBox.layout().setColumnStretch(column, 1) undo = self.undo_stack.createUndoAction(self) redo = self.undo_stack.createRedoAction(self) undo.setShortcut(QKeySequence.Undo) redo.setShortcut(QKeySequence.Redo) self.addActions([undo, redo]) self.undo_stack.indexChanged.connect(self.invalidate) gui.separator(tBox) indBox = gui.indentedBox(tBox, sep=8) form = QFormLayout(formAlignment=Qt.AlignLeft, labelAlignment=Qt.AlignLeft, fieldGrowthPolicy=QFormLayout.AllNonFixedFieldsGrow) indBox.layout().addLayout(form) slider = gui.hSlider(indBox, self, "brushRadius", minValue=1, maxValue=100, createLabel=False) form.addRow("Radius:", slider) slider = gui.hSlider(indBox, self, "density", None, minValue=1, maxValue=100, createLabel=False) form.addRow("Intensity:", slider) slider = gui.hSlider(indBox, self, "symbol_size", None, minValue=1, maxValue=100, createLabel=False, callback=self.set_symbol_size) form.addRow("Symbol:", slider) self.btResetToInput = gui.button(tBox, self, "Reset to Input Data", self.reset_to_input) self.btResetToInput.setDisabled(True) gui.auto_send(self.controlArea, self, "autocommit") # main area GUI viewbox = PaintViewBox(enableMouse=False) self.plotview = pg.PlotWidget(background="w", viewBox=viewbox) self.plotview.sizeHint = lambda: QSize( 200, 100) # Minimum size for 1-d painting self.plot = self.plotview.getPlotItem() axis_color = self.palette().color(QPalette.Text) axis_pen = QPen(axis_color) tickfont = QFont(self.font()) tickfont.setPixelSize(max(int(tickfont.pixelSize() * 2 // 3), 11)) axis = self.plot.getAxis("bottom") axis.setLabel(self.attr1) axis.setPen(axis_pen) axis.setTickFont(tickfont) axis = self.plot.getAxis("left") axis.setLabel(self.attr2) axis.setPen(axis_pen) axis.setTickFont(tickfont) if not self.hasAttr2: self.plot.hideAxis('left') self.plot.hideButtons() self.plot.setXRange(0, 1, padding=0.01) self.mainArea.layout().addWidget(self.plotview) self.info.set_input_summary(self.info.NoInput) self.info.set_output_summary(self.info.NoOutput) # enable brush tool self.toolActions.actions()[0].setChecked(True) self.set_current_tool(self.TOOLS[0][2]) self.set_dimensions() def set_symbol_size(self): if self._scatter_item: self._scatter_item.setSize(self.symbol_size) def set_dimensions(self): if self.hasAttr2: self.plot.setYRange(0, 1, padding=0.01) self.plot.showAxis('left') self.plotview.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Minimum) else: self.plot.setYRange(-.5, .5, padding=0.01) self.plot.hideAxis('left') self.plotview.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Maximum) self._replot() for button, tool in self.toolButtons: if tool.only2d: button.setDisabled(not self.hasAttr2) @Inputs.data def set_data(self, data): """Set the input_data and call reset_to_input""" def _check_and_set_data(data): self.clear_messages() if data and data.is_sparse(): self.Warning.sparse_not_supported() return False if data: if not data.domain.attributes: self.Warning.no_input_variables() data = None elif len(data.domain.attributes) > 2: self.Information.use_first_two() self.info.set_input_summary(len(data), format_summary_details(data)) self.input_data = data self.btResetToInput.setDisabled(data is None) return bool(data) if not _check_and_set_data(data): self.info.set_input_summary(self.info.NoInput) return X = np.array([scale(vals) for vals in data.X[:, :2].T]).T try: y = next(cls for cls in data.domain.class_vars if cls.is_discrete) except StopIteration: if data.domain.class_vars: self.Warning.continuous_target() self.input_classes = ["C1"] self.input_colors = None y = np.zeros(len(data)) else: self.input_classes = y.values self.input_colors = y.palette y = data[:, y].Y self.input_has_attr2 = len(data.domain.attributes) >= 2 if not self.input_has_attr2: self.input_data = np.column_stack((X, np.zeros(len(data)), y)) else: self.input_data = np.column_stack((X, y)) self.reset_to_input() self.unconditional_commit() def reset_to_input(self): """Reset the painting to input data if present.""" if self.input_data is None: return self.undo_stack.clear() index = self.selected_class_label() if self.input_colors is not None: palette = self.input_colors else: palette = colorpalettes.DefaultRGBColors self.colors = palette self.class_model.colors = palette self.class_model[:] = self.input_classes newindex = min(max(index, 0), len(self.class_model) - 1) itemmodels.select_row(self.classValuesView, newindex) self.data = self.input_data.tolist() self.__buffer = self.input_data.copy() prev_attr2 = self.hasAttr2 self.hasAttr2 = self.input_has_attr2 if prev_attr2 != self.hasAttr2: self.set_dimensions() else: # set_dimensions already calls _replot, no need to call it again self._replot() self.commit() def add_new_class_label(self, undoable=True): newlabel = next(label for label in namegen('C', 1) if label not in self.class_model) command = SimpleUndoCommand(lambda: self.class_model.append(newlabel), lambda: self.class_model.__delitem__(-1)) if undoable: self.undo_stack.push(command) else: command.redo() def remove_selected_class_label(self): index = self.selected_class_label() if index is None: return label = self.class_model[index] mask = self.__buffer[:, 2] == index move_mask = self.__buffer[~mask][:, 2] > index self.undo_stack.beginMacro("Delete class label") self.undo_stack.push(UndoCommand(DeleteIndices(mask), self)) self.undo_stack.push(UndoCommand(Move((move_mask, 2), -1), self)) self.undo_stack.push( SimpleUndoCommand(lambda: self.class_model.__delitem__(index), lambda: self.class_model.insert(index, label))) self.undo_stack.endMacro() newindex = min(max(index - 1, 0), len(self.class_model) - 1) itemmodels.select_row(self.classValuesView, newindex) def _class_count_changed(self): self.labels = list(self.class_model) self.removeClassLabel.setEnabled(len(self.class_model) > 1) self.addClassLabel.setEnabled(len(self.class_model) < len(self.colors)) if self.selected_class_label() is None: itemmodels.select_row(self.classValuesView, 0) def _class_value_changed(self, index, _): index = index.row() newvalue = self.class_model[index] oldvalue = self.labels[index] if newvalue != oldvalue: self.labels[index] = newvalue # command = Command( # lambda: self.class_model.__setitem__(index, newvalue), # lambda: self.class_model.__setitem__(index, oldvalue), # ) # self.undo_stack.push(command) def selected_class_label(self): rows = self.classValuesView.selectedIndexes() if rows: return rows[0].row() return None def set_current_tool(self, tool): prev_tool = self.current_tool.__class__ if self.current_tool is not None: self.current_tool.deactivate() self.current_tool.editingStarted.disconnect( self._on_editing_started) self.current_tool.editingFinished.disconnect( self._on_editing_finished) self.current_tool = None self.plot.getViewBox().tool = None if tool not in self.tools_cache: newtool = tool(self, self.plot) self.tools_cache[tool] = newtool newtool.issueCommand.connect(self._add_command) self.current_tool = tool = self.tools_cache[tool] self.plot.getViewBox().tool = tool tool.editingStarted.connect(self._on_editing_started) tool.editingFinished.connect(self._on_editing_finished) tool.activate() if not tool.checkable: self.set_current_tool(prev_tool) def _on_editing_started(self): self.undo_stack.beginMacro("macro") def _on_editing_finished(self): self.undo_stack.endMacro() def execute(self, command): assert isinstance(command, (Append, DeleteIndices, Insert, Move)), \ "Non normalized command" if isinstance(command, (DeleteIndices, Insert)): self._selected_indices = None if isinstance(self.current_tool, SelectTool): self.current_tool.reset() self.__buffer, undo = transform(command, self.__buffer) self._replot() return undo def _add_command(self, cmd): # pylint: disable=too-many-branches name = "Name" if (not self.hasAttr2 and isinstance(cmd, (Move, MoveSelection, Jitter, Magnet))): # tool only supported if both x and y are enabled return if isinstance(cmd, Append): cls = self.selected_class_label() points = np.array([(p.x(), p.y() if self.hasAttr2 else 0, cls) for p in cmd.points]) self.undo_stack.push(UndoCommand(Append(points), self, text=name)) elif isinstance(cmd, Move): self.undo_stack.push(UndoCommand(cmd, self, text=name)) elif isinstance(cmd, SelectRegion): indices = [ i for i, (x, y) in enumerate(self.__buffer[:, :2]) if cmd.region.contains(QPointF(x, y)) ] indices = np.array(indices, dtype=int) self._selected_indices = indices elif isinstance(cmd, DeleteSelection): indices = self._selected_indices if indices is not None and indices.size: self.undo_stack.push( UndoCommand(DeleteIndices(indices), self, text="Delete")) elif isinstance(cmd, MoveSelection): indices = self._selected_indices if indices is not None and indices.size: self.undo_stack.push( UndoCommand(Move((self._selected_indices, slice(0, 2)), np.array([cmd.delta.x(), cmd.delta.y()])), self, text="Move")) elif isinstance(cmd, DeleteIndices): self.undo_stack.push(UndoCommand(cmd, self, text="Delete")) elif isinstance(cmd, Insert): self.undo_stack.push(UndoCommand(cmd, self)) elif isinstance(cmd, AirBrush): data = create_data(cmd.pos.x(), cmd.pos.y(), self.brushRadius / 1000, int(1 + self.density / 20), cmd.rstate) self._add_command(Append([QPointF(*p) for p in zip(*data.T)])) elif isinstance(cmd, Jitter): point = np.array([cmd.pos.x(), cmd.pos.y()]) delta = -apply_jitter(self.__buffer[:, :2], point, self.density / 100.0, 0, cmd.rstate) self._add_command(Move((..., slice(0, 2)), delta)) elif isinstance(cmd, Magnet): point = np.array([cmd.pos.x(), cmd.pos.y()]) delta = -apply_attractor(self.__buffer[:, :2], point, self.density / 100.0, 0) self._add_command(Move((..., slice(0, 2)), delta)) else: assert False, "unreachable" def _replot(self): def pen(color): pen = QPen(color, 1) pen.setCosmetic(True) return pen if self._scatter_item is not None: self.plot.removeItem(self._scatter_item) self._scatter_item = None x = self.__buffer[:, 0].copy() if self.hasAttr2: y = self.__buffer[:, 1].copy() else: y = np.zeros(self.__buffer.shape[0]) colors = self.colors[self.__buffer[:, 2]] pens = [pen(c) for c in colors] brushes = [QBrush(c) for c in colors] self._scatter_item = pg.ScatterPlotItem(x, y, symbol="+", brush=brushes, pen=pens) self.plot.addItem(self._scatter_item) self.set_symbol_size() def _attr_name_changed(self): self.plot.getAxis("bottom").setLabel(self.attr1) self.plot.getAxis("left").setLabel(self.attr2) self.invalidate() def invalidate(self): self.data = self.__buffer.tolist() self.commit() def commit(self): self.Warning.renamed_vars.clear() if not self.data: self.Outputs.data.send(None) self.info.set_output_summary(self.info.NoOutput) return data = np.array(self.data) if self.hasAttr2: X, Y = data[:, :2], data[:, 2] proposed = [self.attr1.strip(), self.attr2.strip()] else: X, Y = data[:, np.newaxis, 0], data[:, 2] proposed = [self.attr1.strip()] if len(np.unique(Y)) >= 2: proposed.append("Class") unique_names, renamed = get_unique_names_duplicates(proposed, True) domain = Domain((map(ContinuousVariable, unique_names[:-1])), DiscreteVariable(unique_names[-1], values=tuple(self.class_model))) data = Table.from_numpy(domain, X, Y) else: unique_names, renamed = get_unique_names_duplicates(proposed, True) domain = Domain(map(ContinuousVariable, unique_names)) data = Table.from_numpy(domain, X) if renamed: self.Warning.renamed_vars(", ".join(renamed)) self.plot.getAxis("bottom").setLabel(unique_names[0]) self.plot.getAxis("left").setLabel(unique_names[1]) data.name = self.table_name self.Outputs.data.send(data) self.info.set_output_summary(len(data), format_summary_details(data)) def sizeHint(self): sh = super().sizeHint() return sh.expandedTo(QSize(570, 690)) def onDeleteWidget(self): self.undo_stack.indexChanged.disconnect(self.invalidate) self.plot.clear() def send_report(self): if self.data is None: return settings = [] if self.attr1 != "x" or self.attr2 != "y": settings += [("Axis x", self.attr1), ("Axis y", self.attr2)] settings += [("Number of points", len(self.data))] self.report_items("Painted data", settings) self.report_plot()
class OWContinuize(widget.OWWidget): name = "Continuize" description = ("Transform categorical attributes into numeric and, " + "optionally, normalize numeric values.") icon = "icons/Continuize.svg" category = "Data" keywords = [] class Inputs: data = Input("Data", Orange.data.Table) class Outputs: data = Output("Data", Orange.data.Table) want_main_area = False buttons_area_orientation = Qt.Vertical resizing_enabled = False multinomial_treatment = Setting(0) zero_based = Setting(1) continuous_treatment = Setting(0) class_treatment = Setting(0) transform_class = Setting(False) autosend = Setting(True) multinomial_treats = (("Target or first value as base", Continuize.FirstAsBase), ("Most frequent value as base", Continuize.FrequentAsBase), ("One attribute per value", Continuize.Indicators), ("Ignore multinomial attributes", Continuize.RemoveMultinomial), ("Remove categorical attributes", Continuize.Remove), ("Treat as ordinal", Continuize.AsOrdinal), ("Divide by number of values", Continuize.AsNormalizedOrdinal)) continuous_treats = (("Leave them as they are", Continuize.Leave), ("Normalize by span", Normalize.NormalizeBySpan), ("Normalize by standard deviation", Normalize.NormalizeBySD)) class_treats = ( ("Leave it as it is", Continuize.Leave), ("Treat as ordinal", Continuize.AsOrdinal), ("Divide by number of values", Continuize.AsNormalizedOrdinal), ("One class per value", Continuize.Indicators), ) value_ranges = ["From -1 to 1", "From 0 to 1"] def __init__(self): super().__init__() box = gui.vBox(self.controlArea, "Categorical Features") gui.radioButtonsInBox( box, self, "multinomial_treatment", btnLabels=[x[0] for x in self.multinomial_treats], callback=self.settings_changed) box = gui.vBox(self.controlArea, "Numeric Features") gui.radioButtonsInBox(box, self, "continuous_treatment", btnLabels=[x[0] for x in self.continuous_treats], callback=self.settings_changed) box = gui.vBox(self.controlArea, "Categorical Outcomes") gui.radioButtonsInBox(box, self, "class_treatment", btnLabels=[t[0] for t in self.class_treats], callback=self.settings_changed) zbbox = gui.vBox(self.controlArea, "Value Range") gui.radioButtonsInBox(zbbox, self, "zero_based", btnLabels=self.value_ranges, callback=self.settings_changed) gui.auto_commit(self.buttonsArea, self, "autosend", "Apply", box=False) self.data = None def settings_changed(self): self.commit() @Inputs.data @check_sql_input def setData(self, data): self.data = data if data is None: self.Outputs.data.send(None) else: self.unconditional_commit() def constructContinuizer(self): conzer = DomainContinuizer( zero_based=self.zero_based, multinomial_treatment=self.multinomial_treats[ self.multinomial_treatment][1], continuous_treatment=self.continuous_treats[ self.continuous_treatment][1], class_treatment=self.class_treats[self.class_treatment][1]) return conzer # def sendPreprocessor(self): # continuizer = self.constructContinuizer() # self.send("Preprocessor", PreprocessedLearner( # lambda data, weightId=0, tc=(self.targetValue if self.classTreatment else -1): # Table(continuizer(data, weightId, tc) # if data.domain.has_discrete_class # else continuizer(data, weightId), data))) def commit(self): continuizer = self.constructContinuizer() if self.data: domain = continuizer(self.data) data = self.data.transform(domain) self.Outputs.data.send(data) else: self.Outputs.data.send(self.data) # None or empty data def send_report(self): self.report_items( "Settings", [("Categorical features", self.multinomial_treats[self.multinomial_treatment][0]), ("Numeric features", self.continuous_treats[self.continuous_treatment][0]), ("Class", self.class_treats[self.class_treatment][0]), ("Value range", self.value_ranges[self.zero_based])])
class OWMergeData(widget.OWWidget): name = "Merge Data" description = "Merge datasets based on the values of selected features." icon = "icons/MergeData.svg" priority = 1110 keywords = ["join"] class Inputs: data = Input("Data", Orange.data.Table, default=True, replaces=["Data A"]) extra_data = Input("Extra Data", Orange.data.Table, replaces=["Data B"]) class Outputs: data = Output( "Data", Orange.data.Table, replaces=["Merged Data A+B", "Merged Data B+A", "Merged Data"]) LeftJoin, InnerJoin, OuterJoin = range(3) OptionNames = ("Append columns from Extra data", "Find matching pairs of rows", "Concatenate tables") OptionDescriptions = ( "The first table may contain, for instance, city names,\n" "and the second would be a list of cities and their coordinates.\n" "Columns with coordinates would then be appended to the output.", "Input tables contain different features describing the same data " "instances.\n" "Output contains matched instances. Rows without matches are removed.", "Input tables contain different features describing the same data " "instances.\n" "Output contains all instances. Data from merged instances is " "merged into single rows.") UserAdviceMessages = [ widget.Message("Confused about merging options?\nSee the tooltips!", "merging_types") ] settingsHandler = MergeDataContextHandler() attr_pairs = ContextSetting(None, schema_only=True) merging = Setting(LeftJoin) auto_apply = Setting(True) settings_version = 2 want_main_area = False resizing_enabled = False class Warning(widget.OWWidget.Warning): renamed_vars = Msg("Some variables have been renamed " "to avoid duplicates.\n{}") class Error(widget.OWWidget.Error): matching_numeric_with_nonnum = Msg( "Numeric and non-numeric columns ({} and {}) cannot be matched.") matching_index_with_sth = Msg("Row index cannot be matched with {}.") matching_id_with_sth = Msg("Instance cannot be matched with {}.") nonunique_left = Msg( "Some combinations of values on the left appear in multiple rows.\n" "For this type of merging, every possible combination of values " "on the left should appear at most once.") nonunique_right = Msg( "Some combinations of values on the right appear in multiple rows." "\n" "Every possible combination of values on the right should appear " "at most once.") def __init__(self): super().__init__() self.data = None self.extra_data = None content = [ INDEX, INSTANCEID, DomainModel.ATTRIBUTES, DomainModel.CLASSES, DomainModel.METAS ] self.model = DomainModelWithTooltips(content) self.extra_model = DomainModelWithTooltips(content) self.info.set_input_summary(self.info.NoInput) self.info.set_output_summary(self.info.NoOutput) grp = gui.radioButtons(self.controlArea, self, "merging", box="Merging", btnLabels=self.OptionNames, tooltips=self.OptionDescriptions, callback=self.change_merging) self.attr_boxes = ConditionBox(self, self.model, self.extra_model, "", "matches") self.attr_boxes.add_row() box = gui.vBox(self.controlArea, box="Row matching") box.layout().addWidget(self.attr_boxes) gui.auto_apply(self.buttonsArea, self) # connect after wrapping self.commit with gui.auto_commit! self.attr_boxes.vars_changed.connect(self.commit) self.attr_boxes.vars_changed.connect(self.store_combo_state) self.settingsAboutToBePacked.connect(self.store_combo_state) def change_merging(self): self.commit() @Inputs.data @check_sql_input def set_data(self, data): self.data = data self.model.set_domain(data.domain if data else None) @Inputs.extra_data @check_sql_input def set_extra_data(self, data): self.extra_data = data self.extra_model.set_domain(data.domain if data else None) def store_combo_state(self): self.attr_pairs = self.attr_boxes.current_state() def handleNewSignals(self): self.closeContext() self.attr_pairs = [self._find_best_match()] self.openContext(self.data and self.data.domain, self.extra_data and self.extra_data.domain) self.attr_boxes.set_state(self.attr_pairs) summary, details, kwargs = self.info.NoInput, "", {} if self.data or self.extra_data: n_data = len(self.data) if self.data else 0 n_extra_data = len(self.extra_data) if self.extra_data else 0 summary = f"{self.info.format_number(n_data)}, " \ f"{self.info.format_number(n_extra_data)}" kwargs = {"format": Qt.RichText} details = format_multiple_summaries([("Data", self.data), ("Extra data", self.extra_data)]) self.info.set_input_summary(summary, details, **kwargs) self.unconditional_commit() def _find_best_match(self): def get_unique_str_metas_names(model_): return [m for m in model_ if isinstance(m, StringVariable)] attr, extra_attr, n_max_intersect = INDEX, INDEX, 0 str_metas = get_unique_str_metas_names(self.model) extra_str_metas = get_unique_str_metas_names(self.extra_model) for m_a, m_b in product(str_metas, extra_str_metas): col = self.data[:, m_a].metas extra_col = self.extra_data[:, m_b].metas if col.size and extra_col.size \ and isinstance(col[0][0], str) \ and isinstance(extra_col[0][0], str): n_inter = len(np.intersect1d(col, extra_col)) if n_inter > n_max_intersect: n_max_intersect, attr, extra_attr = n_inter, m_a, m_b return attr, extra_attr def commit(self): self.clear_messages() merged = self.merge() if self.data and self.extra_data else None self.Outputs.data.send(merged) details = format_summary_details(merged) if merged else "" summary = len(merged) if merged else self.info.NoOutput self.info.set_output_summary(summary, details) def send_report(self): # pylint: disable=invalid-sequence-index self.report_items( (("Merging", self.OptionNames[self.merging]), ("Match", ", ".join( f"{self._get_col_name(left)} with {self._get_col_name(right)}" for left, right in self.attr_boxes.current_state())))) def merge(self): # pylint: disable=invalid-sequence-index pairs = self.attr_boxes.current_state() if not self._check_pair_types(pairs): return None left_vars, right_vars = zip(*pairs) left_mask = np.full(len(self.data), True) left = np.vstack( tuple( self._values(self.data, var, left_mask) for var in left_vars)).T right_mask = np.full(len(self.extra_data), True) right = np.vstack( tuple( self._values(self.extra_data, var, right_mask) for var in right_vars)).T if not self._check_uniqueness(left, left_mask, right, right_mask): return None method = self._merge_methods[self.merging] lefti, righti, rightu = method(self, left, left_mask, right, right_mask) reduced_extra_data = \ self._compute_reduced_extra_data(right_vars, lefti, righti, rightu) return self._join_table_by_indices(reduced_extra_data, lefti, righti, rightu) def _check_pair_types(self, pairs): for left, right in pairs: if isinstance(left, ContinuousVariable) \ != isinstance(right, ContinuousVariable): self.Error.matching_numeric_with_nonnum(left, right) return False if INDEX in (left, right) and left != right: self.Error.matching_index_with_sth( self._get_col_name(({left, right} - {INDEX}).pop())) return False if INSTANCEID in (left, right) and left != right: self.Error.matching_id_with_sth( self._get_col_name(({left, right} - {INSTANCEID}).pop())) return False return True @staticmethod def _get_col_name(obj): return f"'{obj.name}'" if isinstance(obj, Variable) else obj.lower() def _check_uniqueness(self, left, left_mask, right, right_mask): ok = True masked_right = right[right_mask] if len(set(map(tuple, masked_right))) != len(masked_right): self.Error.nonunique_right() ok = False if self.merging != self.LeftJoin: masked_left = left[left_mask] if len(set(map(tuple, masked_left))) != len(masked_left): self.Error.nonunique_left() ok = False return ok def _compute_reduced_extra_data(self, right_match_vars, lefti, righti, rightu): """Prepare a table with extra columns that will appear in the merged table""" domain = self.data.domain extra_domain = self.extra_data.domain def var_needed(var): if rightu is not None and rightu.size: return True if var in right_match_vars and self.merging != self.OuterJoin: return False if var not in domain: return True both_defined = (lefti != -1) * (righti != -1) left_col = \ self.data.get_column_view(var)[0][lefti[both_defined]] right_col = \ self.extra_data.get_column_view(var)[0][righti[both_defined]] if var.is_primitive(): left_col = left_col.astype(float) right_col = right_col.astype(float) mask_left = np.isfinite(left_col) mask_right = np.isfinite(right_col) return not (np.all(mask_left == mask_right) and np.all( left_col[mask_left] == right_col[mask_right])) else: return not np.all(left_col == right_col) extra_vars = [ var for var in chain(extra_domain.variables, extra_domain.metas) if var_needed(var) ] return self.extra_data[:, extra_vars] @staticmethod def _values(data, var, mask): """Return an iterotor over keys for rows of the table.""" if var == INDEX: return np.arange(len(data)) if var == INSTANCEID: return np.fromiter((inst.id for inst in data), count=len(data), dtype=np.int) col = data.get_column_view(var)[0] if var.is_primitive(): col = col.astype(float, copy=False) nans = np.isnan(col) mask *= ~nans if var.is_discrete: col = col.astype(int) col[nans] = len(var.values) col = np.array(var.values + (np.nan, ))[col] else: col = col.copy() defined = col.astype(bool) mask *= defined col[~mask] = np.nan return col def _left_join_indices(self, left, left_mask, right, right_mask): """Compute a two-row array of indices: - the first row contains indices for the primary table, - the second row contains the matching rows in the extra table or -1""" data = self.data # Don't match nans. This is needed since numpy may change nan to string # nan, so nan's will match each other indices = np.arange(len(right)) indices[~right_mask] = -1 if right.shape[1] == 1: # The more common case can be handled faster right_map = dict(zip(right.flatten(), indices)) righti = (right_map.get(val, -1) for val in left.flatten()) else: right_map = dict(zip(map(tuple, right), indices)) righti = (right_map.get(tuple(val), -1) for val in left) righti = np.fromiter(righti, dtype=np.int64, count=len(data)) lefti = np.arange(len(data), dtype=np.int64) righti[lefti[~left_mask]] = -1 return lefti, righti, None def _inner_join_indices(self, left, left_mask, right, right_mask): """Use _augment_indices to compute the array of indices, then remove those with no match in the second table""" lefti, righti, _ = \ self._left_join_indices(left, left_mask, right, right_mask) mask = righti != [-1] return lefti[mask], righti[mask], None def _outer_join_indices(self, left, left_mask, right, right_mask): """Use _augment_indices to compute the array of indices, then add rows in the second table without a match in the first""" lefti, righti, _ = \ self._left_join_indices(left, left_mask, right, right_mask) unused = np.full(len(right), True) unused[righti] = False if len(right) - 1 not in righti: # righti can include -1, which sets the last element as used unused[-1] = True return lefti, righti, np.nonzero(unused)[0] _merge_methods = [ _left_join_indices, _inner_join_indices, _outer_join_indices ] def _join_table_by_indices(self, reduced_extra, lefti, righti, rightu): """Join (horizontally) self.data and reduced_extra, taking the pairs of rows given in indices""" if not lefti.size: return None lt_dom = self.data.domain xt_dom = reduced_extra.domain domain = self._domain_rename_duplicates( lt_dom.attributes + xt_dom.attributes, lt_dom.class_vars + xt_dom.class_vars, lt_dom.metas + xt_dom.metas) X = self._join_array_by_indices(self.data.X, reduced_extra.X, lefti, righti) Y = self._join_array_by_indices(np.c_[self.data.Y], np.c_[reduced_extra.Y], lefti, righti) string_cols = [ i for i, var in enumerate(domain.metas) if var.is_string ] metas = self._join_array_by_indices(self.data.metas, reduced_extra.metas, lefti, righti, string_cols) if rightu is not None: # This domain is used for transforming the extra rows for outer join # It must use the original - not renamed - variables from right, so # values are copied, # but new domain for the left, so renamed values are *not* copied right_domain = Orange.data.Domain( domain.attributes[:len(lt_dom.attributes)] + xt_dom.attributes, domain.class_vars[:len(lt_dom.class_vars)] + xt_dom.class_vars, domain.metas[:len(lt_dom.metas)] + xt_dom.metas) extras = self.extra_data[rightu].transform(right_domain) X = np.vstack((X, extras.X)) extras_Y = extras.Y if extras_Y.ndim == 1: extras_Y = extras_Y.reshape(-1, 1) Y = np.vstack((Y, extras_Y)) metas = np.vstack((metas, extras.metas)) table = Orange.data.Table.from_numpy(domain, X, Y, metas) table.name = getattr(self.data, 'name', '') table.attributes = getattr(self.data, 'attributes', {}) if rightu is not None: table.ids = np.hstack((self.data.ids, self.extra_data.ids[rightu])) else: table.ids = self.data.ids[lefti] return table def _domain_rename_duplicates(self, attributes, class_vars, metas): """Check for duplicate variable names in domain. If any, rename the variables, by replacing them with new ones (names are appended a number). """ attrs, cvars, mets = [], [], [] n_attrs, n_cvars, n_metas = len(attributes), len(class_vars), len( metas) lists = [attrs] * n_attrs + [cvars] * n_cvars + [mets] * n_metas all_vars = attributes + class_vars + metas proposed_names = [m.name for m in all_vars] unique_names = get_unique_names_duplicates(proposed_names) duplicates = set() for p_name, u_name, var, c in zip(proposed_names, unique_names, all_vars, lists): if p_name != u_name: duplicates.add(p_name) var = var.copy(name=u_name) c.append(var) if duplicates: self.Warning.renamed_vars(", ".join(duplicates)) return Orange.data.Domain(attrs, cvars, mets) @staticmethod def _join_array_by_indices(left, right, lefti, righti, string_cols=None): """Join (horizontally) two arrays, taking pairs of rows given in indices """ def prepare(arr, inds, str_cols): try: newarr = arr[inds] except IndexError: newarr = np.full_like(arr, np.nan) else: empty = np.full(arr.shape[1], np.nan) if str_cols: assert arr.dtype == object empty = empty.astype(object) empty[str_cols] = '' newarr[inds == -1] = empty return newarr left_width = left.shape[1] str_left = [i for i in string_cols or () if i < left_width] str_right = [ i - left_width for i in string_cols or () if i >= left_width ] res = hstack((prepare(left, lefti, str_left), prepare(right, righti, str_right))) return res @staticmethod def migrate_settings(settings, version=None): def mig_value(x): if x == "Position (index)": return INDEX if x == "Source position (index)": return INSTANCEID return x if not version: operations = ("augment", "merge", "combine") oper = operations[settings["merging"]] settings["attr_pairs"] = (True, True, [ (mig_value(settings[f"attr_{oper}_data"]), mig_value(settings[f"attr_{oper}_extra"])) ]) for oper in operations: del settings[f"attr_{oper}_data"] del settings[f"attr_{oper}_extra"] if not version or version < 2 and "attr_pairs" in settings: data_exists, extra_exists, attr_pairs = settings.pop("attr_pairs") if not (data_exists and extra_exists): settings["context_settings"] = [] return mapper = {0: (INDEX, 100), 1: (INSTANCEID, 100)} context = ContextHandler().new_context() context.values["attr_pairs"] = [ tuple(mapper.get(var, (var, 100)) for var in pair) for pair in attr_pairs ] context.variables1 = {} context.variables2 = {} settings["context_settings"] = [context]
class OWCorrelations(OWWidget): name = "Correlations" description = "Compute all pairwise attribute correlations." icon = "icons/Correlations.svg" priority = 1106 class Inputs: data = Input("Data", Table) class Outputs: data = Output("Data", Table) features = Output("Features", AttributeList) correlations = Output("Correlations", Table) want_main_area = False want_control_area = True correlation_type: int settings_version = 3 settingsHandler = DomainContextHandler() selection = ContextSetting([]) feature = ContextSetting(None) correlation_type = Setting(0) class Information(OWWidget.Information): removed_cons_feat = Msg("Constant features have been removed.") class Warning(OWWidget.Warning): not_enough_vars = Msg("At least two numeric features are needed.") not_enough_inst = Msg("At least two instances are needed.") def __init__(self): super().__init__() self.data = None # type: Table self.cont_data = None # type: Table # GUI box = gui.vBox(self.controlArea) self.correlation_combo = gui.comboBox( box, self, "correlation_type", items=CorrelationType.items(), orientation=Qt.Horizontal, callback=self._correlation_combo_changed ) self.feature_model = DomainModel( order=DomainModel.ATTRIBUTES, separators=False, placeholder="(All combinations)", valid_types=ContinuousVariable) gui.comboBox( box, self, "feature", callback=self._feature_combo_changed, model=self.feature_model ) self.vizrank, _ = CorrelationRank.add_vizrank( None, self, None, self._vizrank_selection_changed) self.vizrank.button.setEnabled(False) self.vizrank.threadStopped.connect(self._vizrank_stopped) gui.separator(box) box.layout().addWidget(self.vizrank.filter) box.layout().addWidget(self.vizrank.rank_table) button_box = gui.hBox(self.buttonsArea) button_box.layout().addWidget(self.vizrank.button) self.info.set_input_summary(self.info.NoInput) self.info.set_output_summary(self.info.NoOutput) @staticmethod def sizeHint(): return QSize(350, 400) def _correlation_combo_changed(self): self.apply() def _feature_combo_changed(self): self.apply() def _vizrank_selection_changed(self, *args): self.selection = list(args) self.commit() def _vizrank_stopped(self): self._vizrank_select() def _vizrank_select(self): model = self.vizrank.rank_table.model() if not model.rowCount(): return selection = QItemSelection() # This flag is needed because data in the model could be # filtered by a feature and therefore selection could not be found selection_in_model = False if self.selection: sel_names = sorted(var.name for var in self.selection) for i in range(model.rowCount()): # pylint: disable=protected-access names = sorted(x.name for x in model.data( model.index(i, 0), CorrelationRank._AttrRole)) if names == sel_names: selection.select(model.index(i, 0), model.index(i, model.columnCount() - 1)) selection_in_model = True break if not selection_in_model: selection.select(model.index(0, 0), model.index(0, model.columnCount() - 1)) self.vizrank.rank_table.selectionModel().select( selection, QItemSelectionModel.ClearAndSelect) @Inputs.data def set_data(self, data): self.closeContext() self.clear_messages() self.data = data self.cont_data = None self.selection = [] if data is not None: if len(data) < 2: self.Warning.not_enough_inst() else: domain = data.domain cont_vars = [a for a in domain.class_vars + domain.metas + domain.attributes if a.is_continuous] cont_data = Table.from_table(Domain(cont_vars), data) remover = Remove(Remove.RemoveConstant) cont_data = remover(cont_data) if remover.attr_results["removed"]: self.Information.removed_cons_feat() if len(cont_data.domain.attributes) < 2: self.Warning.not_enough_vars() else: self.cont_data = SklImpute()(cont_data) self.info.set_input_summary(len(data), format_summary_details(data)) else: self.info.set_input_summary(self.info.NoInput) self.set_feature_model() self.openContext(self.cont_data) self.apply() self.vizrank.button.setEnabled(self.cont_data is not None) def set_feature_model(self): self.feature_model.set_domain(self.cont_data and self.cont_data.domain) data = self.data if self.cont_data and data.domain.has_continuous_class: self.feature = self.cont_data.domain[data.domain.class_var.name] else: self.feature = None def apply(self): self.vizrank.initialize() if self.cont_data is not None: # this triggers self.commit() by changing vizrank selection self.vizrank.toggle() else: self.commit() def commit(self): self.Outputs.data.send(self.data) summary = len(self.data) if self.data else self.info.NoOutput details = format_summary_details(self.data) if self.data else "" self.info.set_output_summary(summary, details) if self.data is None or self.cont_data is None: self.Outputs.features.send(None) self.Outputs.correlations.send(None) return attrs = [ContinuousVariable("Correlation"), ContinuousVariable("FDR")] metas = [StringVariable("Feature 1"), StringVariable("Feature 2")] domain = Domain(attrs, metas=metas) model = self.vizrank.rank_model x = np.array([[float(model.data(model.index(row, 0), role)) for role in (Qt.DisplayRole, CorrelationRank.PValRole)] for row in range(model.rowCount())]) x[:, 1] = FDR(list(x[:, 1])) # pylint: disable=protected-access m = np.array([[a.name for a in model.data(model.index(row, 0), CorrelationRank._AttrRole)] for row in range(model.rowCount())], dtype=object) corr_table = Table(domain, x, metas=m) corr_table.name = "Correlations" # data has been imputed; send original attributes self.Outputs.features.send(AttributeList( [self.data.domain[var.name] for var in self.selection])) self.Outputs.correlations.send(corr_table) def send_report(self): self.report_table(CorrelationType.items()[self.correlation_type], self.vizrank.rank_table) @classmethod def migrate_context(cls, context, version): if version < 2: sel = context.values["selection"] context.values["selection"] = [(var.name, vartype(var)) for var in sel[0]] if version < 3: sel = context.values["selection"] context.values["selection"] = ([(name, vtype + 100) for name, vtype in sel], -3)
class OWSave(widget.OWWidget): name = "Save" description = "Save data to an output file." icon = "icons/Save.svg" author = "Martin Frlin" category = "Data" keywords = ["data", "save"] inputs = [("Data", Table, "dataset")] want_main_area = False resizing_enabled = False last_dir = Setting("") def __init__(self, data=None, file_formats=None): super().__init__() self.data = None self.filename = "" self.format_index = 0 self.file_formats = file_formats or FileFormat.writers self.formats = [(f.DESCRIPTION, f.EXTENSIONS) for f in sorted(set(self.file_formats.values()), key=lambda f: f.OWSAVE_PRIORITY)] self.comboBoxFormat = gui.comboBox( self.controlArea, self, value='format_index', items=['{} (*{})'.format(x[0], ' *'.join(x[1])) for x in self.formats], box='File Format') box = gui.widgetBox(self.controlArea) self.save = gui.button(box, self, "Save", callback=self.save_file, default=True, disabled=True) gui.separator(box) self.saveAs = gui.button(box, self, "Save as ...", callback=self.save_file_as, disabled=True) self.setMinimumWidth(320) self.adjustSize() if data: self.dataset(data) def dataset(self, data): self.data = data self.save.setDisabled(data is None) self.saveAs.setDisabled(data is None) def save_file_as(self): format_name, format_extensions = self.formats[self.format_index] home_dir = os.path.expanduser("~") filename = QtGui.QFileDialog.getSaveFileName( self, 'Save as ...', self.filename or os.path.join((self.last_dir or home_dir), getattr(self.data, 'name', '')), '{} (*{})'.format(format_name, ' *'.join(format_extensions))) if not filename: return for ext in format_extensions: if filename.endswith(ext): break else: filename += format_extensions[0] self.filename = filename self.last_dir, file_name = os.path.split(self.filename) self.save.setText("Save as '%s'" % file_name) self.save.setDisabled(False) self.save_file() def save_file(self): if not self.filename: self.save_file_as() elif self.data is not None: try: ext = self.formats[self.format_index][1] if not isinstance(ext, str): ext = ext[0] # is e.g. a tuple of extensions self.file_formats[ext].write(self.filename, self.data) self.error() except Exception as errValue: self.error(str(errValue))