def __init__(self, model): QTableView.__init__(self) self.horizontalHeader().hide() self.verticalHeader().hide() self.setShowGrid(False) self.setSelectionMode(QTableView.NoSelection) self.setItemDelegate(HorizontalGridDelegate()) self.setModel(model)
def test_model(): app = QApplication([]) view = QTableView( sortingEnabled=True ) data = Orange.data.Table("lenses") model = TableModel(data) view.setModel(model) view.show() view.raise_() return app.exec()
def __init__(self): super().__init__() self.corpus = None # Corpus self.n_matching = '' # Info on docs matching the word self.n_tokens = '' # Info on tokens self.n_types = '' # Info on types (unique tokens) self.is_word_on_input = False # Info attributes info_box = gui.widgetBox(self.controlArea, 'Info') gui.label(info_box, self, 'Tokens: %(n_tokens)s') gui.label(info_box, self, 'Types: %(n_types)s') gui.label(info_box, self, 'Matching: %(n_matching)s') # Width parameter gui.spin(self.controlArea, self, 'context_width', 3, 10, box=True, label="Number of words:", callback=self.set_width) gui.rubber(self.controlArea) # Search c_box = gui.widgetBox(self.mainArea, orientation="vertical") self.input = gui.lineEdit( c_box, self, 'word', orientation=Qt.Horizontal, sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed), label='Query:', callback=self.set_word, callbackOnType=True) self.input.setFocus() # Concordances view self.conc_view = QTableView() self.model = ConcordanceModel() self.conc_view.setModel(self.model) self.conc_view.setWordWrap(False) self.conc_view.setSelectionBehavior(QTableView.SelectRows) self.conc_view.setSelectionModel(DocumentSelectionModel(self.model)) self.conc_view.setItemDelegate(HorizontalGridDelegate()) self.conc_view.selectionModel().selectionChanged.connect(self.selection_changed) self.conc_view.horizontalHeader().hide() self.conc_view.setShowGrid(False) self.mainArea.layout().addWidget(self.conc_view) self.set_width() # Auto-commit box gui.auto_commit(self.controlArea, self, 'autocommit', 'Commit', 'Auto commit is on')
def populate_main_area(self): grid = QWidget() grid.setLayout(QGridLayout(grid)) self.mainArea.layout().addWidget(grid) col_type = gui.label(None, self, '%(col_type)s') grid.layout().addWidget(col_type, 0, 1) grid.layout().setAlignment(col_type, Qt.AlignHCenter) row_type = gui.label(None, self, '%(row_type)s') grid.layout().addWidget(row_type, 1, 0) grid.layout().setAlignment(row_type, Qt.AlignVCenter) self.view = QTableView() self.model = None grid.layout().addWidget(self.view, 1, 1)
class OWCreateInstance(OWWidget): name = "Create Instance" description = "Interactively create a data instance from sample dataset." icon = "icons/CreateInstance.svg" category = "Data" keywords = ["simulator"] priority = 4000 class Inputs: data = Input("Data", Table) reference = Input("Reference", Table) class Outputs: data = Output("Data", Table) class Information(OWWidget.Information): nans_removed = Msg("Variables with only missing values were " "removed from the list.") want_main_area = False ACTIONS = ["median", "mean", "random", "input"] HEADER = [["name", "Variable"], ["variable", "Value"]] Header = namedtuple( "header", [tag for tag, _ in HEADER] )(*range(len(HEADER))) values: Dict[str, Union[float, str]] = Setting({}, schema_only=True) append_to_data = Setting(True) auto_commit = Setting(True) def __init__(self): super().__init__() self.data: Optional[Table] = None self.reference: Optional[Table] = None self.filter_edit = QLineEdit(textChanged=self.__filter_edit_changed, placeholderText="Filter...") self.view = QTableView(sortingEnabled=True, contextMenuPolicy=Qt.CustomContextMenu, selectionMode=QTableView.NoSelection) self.view.customContextMenuRequested.connect(self.__menu_requested) self.view.setItemDelegateForColumn( self.Header.variable, VariableDelegate(self) ) self.view.verticalHeader().hide() self.view.horizontalHeader().setStretchLastSection(True) self.view.horizontalHeader().setMaximumSectionSize(350) self.model = VariableItemModel(self) self.model.setHorizontalHeaderLabels([x for _, x in self.HEADER]) self.model.dataChanged.connect(self.__table_data_changed) self.model.dataHasNanColumn.connect(self.Information.nans_removed) self.proxy_model = QSortFilterProxyModel() self.proxy_model.setFilterKeyColumn(-1) self.proxy_model.setFilterCaseSensitivity(False) self.proxy_model.setSourceModel(self.model) self.view.setModel(self.proxy_model) vbox = gui.vBox(self.controlArea, box=True) vbox.layout().addWidget(self.filter_edit) vbox.layout().addWidget(self.view) box = gui.hBox(vbox, objectName="buttonBox") gui.rubber(box) for name in self.ACTIONS: gui.button( box, self, name.capitalize(), lambda *args, fun=name: self._initialize_values(fun), autoDefault=False ) gui.rubber(box) # pylint: disable=unnecessary-lambda append = gui.checkBox(self.buttonsArea, self, "append_to_data", "Append this instance to input data", callback=lambda: self.commit()) gui.rubber(self.buttonsArea) box = gui.auto_apply(self.buttonsArea, self, "auto_commit") self._set_input_summary() self._set_output_summary() self.settingsAboutToBePacked.connect(self.pack_settings) def __filter_edit_changed(self): self.proxy_model.setFilterFixedString(self.filter_edit.text().strip()) def __table_data_changed(self): self.commit() def __menu_requested(self, point: QPoint): index = self.view.indexAt(point) model: QSortFilterProxyModel = index.model() source_index = model.mapToSource(index) menu = QMenu(self) for action in self._create_actions(source_index): menu.addAction(action) menu.popup(self.view.viewport().mapToGlobal(point)) def _create_actions(self, index: QModelIndex) -> List[QAction]: actions = [] for name in self.ACTIONS: action = QAction(name.capitalize(), self) action.triggered.connect( lambda *args, fun=name: self._initialize_values(fun, [index]) ) actions.append(action) return actions def _initialize_values(self, fun: str, indices: List[QModelIndex] = None): cont_fun = {"median": np.nanmedian, "mean": np.nanmean, "random": cont_random, "input": np.nanmean}.get(fun, NotImplemented) disc_fun = {"median": majority, "mean": majority, "random": disc_random, "input": majority}.get(fun, NotImplemented) if not self.data or fun == "input" and not self.reference: return self.model.dataChanged.disconnect(self.__table_data_changed) rows = range(self.proxy_model.rowCount()) if indices is None else \ [index.row() for index in indices] for row in rows: index = self.model.index(row, self.Header.variable) variable = self.model.data(index, VariableRole) if fun == "input": if variable not in self.reference.domain: continue values = self.reference.get_column_view(variable)[0] if variable.is_primitive(): values = values.astype(float) if all(np.isnan(values)): continue else: values = self.model.data(index, ValuesRole) if variable.is_continuous: value = cont_fun(values) value = round(value, variable.number_of_decimals) elif variable.is_discrete: value = disc_fun(values) elif variable.is_string: value = "" else: raise NotImplementedError self.model.setData(index, value, ValueRole) self.model.dataChanged.connect(self.__table_data_changed) self.commit() @Inputs.data def set_data(self, data: Table): self.data = data self._set_input_summary() self._set_model_data() self.unconditional_commit() def _set_model_data(self): self.Information.nans_removed.clear() self.model.removeRows(0, self.model.rowCount()) if not self.data: return self.model.set_data(self.data, self.values) self.values = {} self.view.horizontalHeader().setStretchLastSection(False) self.view.resizeColumnsToContents() self.view.resizeRowsToContents() self.view.horizontalHeader().setStretchLastSection(True) @Inputs.reference def set_reference(self, data: Table): self.reference = data self._set_input_summary() def _set_input_summary(self): n_data = len(self.data) if self.data else 0 n_refs = len(self.reference) if self.reference else 0 summary, details, kwargs = self.info.NoInput, "", {} if self.data or self.reference: summary = f"{self.info.format_number(n_data)}, " \ f"{self.info.format_number(n_refs)}" data_list = [("Data", self.data), ("Reference", self.reference)] details = format_multiple_summaries(data_list) kwargs = {"format": Qt.RichText} self.info.set_input_summary(summary, details, **kwargs) def _set_output_summary(self, data: Optional[Table] = None): if data: summary, details = len(data), format_summary_details(data) else: summary, details = self.info.NoOutput, "" self.info.set_output_summary(summary, details) def commit(self): output_data = None if self.data: output_data = self._create_data_from_values() if self.append_to_data: output_data = self._append_to_data(output_data) self._set_output_summary(output_data) self.Outputs.data.send(output_data) def _create_data_from_values(self) -> Table: data = Table.from_domain(self.data.domain, 1) data.name = "created" data.X[:] = np.nan data.Y[:] = np.nan for i, m in enumerate(self.data.domain.metas): data.metas[:, i] = "" if m.is_string else np.nan values = self._get_values() for var_name, value in values.items(): data[:, var_name] = value return data def _append_to_data(self, data: Table) -> Table: assert self.data assert len(data) == 1 var = DiscreteVariable("Source ID", values=(self.data.name, data.name)) data = Table.concatenate([self.data, data], axis=0) domain = Domain(data.domain.attributes, data.domain.class_vars, data.domain.metas + (var,)) data = data.transform(domain) data.metas[: len(self.data), -1] = 0 data.metas[len(self.data):, -1] = 1 return data def _get_values(self) -> Dict[str, Union[str, float]]: values = {} for row in range(self.model.rowCount()): index = self.model.index(row, self.Header.variable) values[self.model.data(index, VariableRole).name] = \ self.model.data(index, ValueRole) return values def send_report(self): if not self.data: return self.report_domain("Input", self.data.domain) self.report_domain("Output", self.data.domain) items = [] values: Dict = self._get_values() for var in self.data.domain.variables + self.data.domain.metas: val = values.get(var.name, np.nan) if var.is_primitive(): val = var.repr_val(val) items.append([f"{var.name}:", val]) self.report_table("Values", items) @staticmethod def sizeHint(): return QSize(600, 500) def pack_settings(self): self.values: Dict[str, Union[str, float]] = self._get_values()
class OWRank(OWWidget): name = "Rank" description = "Rank and filter data features by their relevance." icon = "icons/Rank.svg" priority = 1102 buttons_area_orientation = Qt.Vertical inputs = [("Data", Table, "setData"), ("Scorer", score.Scorer, "set_learner", widget.Multiple)] outputs = [("Reduced Data", Table, widget.Default), ("Scores", Table)] SelectNone, SelectAll, SelectManual, SelectNBest = range(4) cls_default_selected = Setting({"Gain Ratio", "Gini Decrease"}) reg_default_selected = Setting({"Univariate Linear Regression", "RReliefF"}) selectMethod = Setting(SelectNBest) nSelected = Setting(5) auto_apply = Setting(True) # Header state for discrete/continuous/no_class scores headerState = Setting([None, None, None]) settings_version = 1 settingsHandler = DomainContextHandler() selected_rows = ContextSetting([]) gain = inf_gain = gini = anova = chi2 = ulr = relief = rrelief = fcbc = True _score_vars = ["gain", "inf_gain", "gini", "anova", "chi2", "relief", "fcbc", "ulr", "rrelief"] class Warning(OWWidget.Warning): no_target_var = Msg("Data does not have a target variable") class Error(OWWidget.Error): invalid_type = Msg("Cannot handle target variable type {}") inadequate_learner = Msg("{}") def __init__(self): super().__init__() self.measure_scores = None self.update_scores = True self.usefulAttributes = [] self.learners = {} self.labels = [] self.out_domain_desc = None self.all_measures = SCORES self.selectedMeasures = dict([(m.name, True) for m in self.all_measures]) # Discrete (0) or continuous (1) class mode self.rankMode = 0 self.data = None self.discMeasures = [m for m in self.all_measures if issubclass(DiscreteVariable, m.score.class_type)] self.contMeasures = [m for m in self.all_measures if issubclass(ContinuousVariable, m.score.class_type)] self.score_checks = [] self.cls_scoring_box = gui.vBox(None, "Scoring for Classification") self.reg_scoring_box = gui.vBox(None, "Scoring for Regression") boxes = [self.cls_scoring_box] * 7 + [self.reg_scoring_box] * 2 for _score, var, box in zip(SCORES, self._score_vars, boxes): check = gui.checkBox( box, self, var, label=_score.name, callback=lambda val=_score: self.measuresSelectionChanged(val)) self.score_checks.append(check) self.score_stack = QStackedWidget(self) self.score_stack.addWidget(self.cls_scoring_box) self.score_stack.addWidget(self.reg_scoring_box) self.score_stack.addWidget(QWidget()) self.controlArea.layout().addWidget(self.score_stack) gui.rubber(self.controlArea) selMethBox = gui.vBox( self.controlArea, "Select Attributes", addSpace=True) grid = QGridLayout() grid.setContentsMargins(6, 0, 6, 0) self.selectButtons = QButtonGroup() self.selectButtons.buttonClicked[int].connect(self.setSelectMethod) def button(text, buttonid, toolTip=None): b = QRadioButton(text) self.selectButtons.addButton(b, buttonid) if toolTip is not None: b.setToolTip(toolTip) return b b1 = button(self.tr("None"), OWRank.SelectNone) b2 = button(self.tr("All"), OWRank.SelectAll) b3 = button(self.tr("Manual"), OWRank.SelectManual) b4 = button(self.tr("Best ranked:"), OWRank.SelectNBest) s = gui.spin(selMethBox, self, "nSelected", 1, 100, callback=self.nSelectedChanged) grid.addWidget(b1, 0, 0) grid.addWidget(b2, 1, 0) grid.addWidget(b3, 2, 0) grid.addWidget(b4, 3, 0) grid.addWidget(s, 3, 1) self.selectButtons.button(self.selectMethod).setChecked(True) selMethBox.layout().addLayout(grid) gui.auto_commit(selMethBox, self, "auto_apply", "Send", box=False) # Discrete, continuous and no_class table views are stacked self.ranksViewStack = QStackedLayout() self.mainArea.layout().addLayout(self.ranksViewStack) self.discRanksView = QTableView() self.ranksViewStack.addWidget(self.discRanksView) self.discRanksView.setSelectionBehavior(QTableView.SelectRows) self.discRanksView.setSelectionMode(QTableView.MultiSelection) self.discRanksView.setSortingEnabled(True) self.discRanksLabels = ["#"] + [m.shortname for m in self.discMeasures] self.discRanksModel = QStandardItemModel(self) self.discRanksModel.setHorizontalHeaderLabels(self.discRanksLabels) self.discRanksProxyModel = MySortProxyModel(self) self.discRanksProxyModel.setSourceModel(self.discRanksModel) self.discRanksView.setModel(self.discRanksProxyModel) self.discRanksView.setColumnWidth(0, 20) self.discRanksView.selectionModel().selectionChanged.connect( self.commit ) self.discRanksView.pressed.connect(self.onSelectItem) self.discRanksView.horizontalHeader().sectionClicked.connect( self.headerClick ) self.discRanksView.verticalHeader().sectionClicked.connect( self.onSelectItem ) if self.headerState[0] is not None: self.discRanksView.horizontalHeader().restoreState( self.headerState[0]) self.contRanksView = QTableView() self.ranksViewStack.addWidget(self.contRanksView) self.contRanksView.setSelectionBehavior(QTableView.SelectRows) self.contRanksView.setSelectionMode(QTableView.MultiSelection) self.contRanksView.setSortingEnabled(True) self.contRanksLabels = ["#"] + [m.shortname for m in self.contMeasures] self.contRanksModel = QStandardItemModel(self) self.contRanksModel.setHorizontalHeaderLabels(self.contRanksLabels) self.contRanksProxyModel = MySortProxyModel(self) self.contRanksProxyModel.setSourceModel(self.contRanksModel) self.contRanksView.setModel(self.contRanksProxyModel) self.contRanksView.setColumnWidth(0, 20) self.contRanksView.selectionModel().selectionChanged.connect( self.commit ) self.contRanksView.pressed.connect(self.onSelectItem) self.contRanksView.horizontalHeader().sectionClicked.connect( self.headerClick ) self.contRanksView.verticalHeader().sectionClicked.connect( self.onSelectItem ) if self.headerState[1] is not None: self.contRanksView.horizontalHeader().restoreState( self.headerState[1]) self.noClassRanksView = QTableView() self.ranksViewStack.addWidget(self.noClassRanksView) self.noClassRanksView.setSelectionBehavior(QTableView.SelectRows) self.noClassRanksView.setSelectionMode(QTableView.MultiSelection) self.noClassRanksView.setSortingEnabled(True) self.noClassRanksLabels = ["#"] self.noClassRanksModel = QStandardItemModel(self) self.noClassRanksModel.setHorizontalHeaderLabels(self.noClassRanksLabels) self.noClassRanksProxyModel = MySortProxyModel(self) self.noClassRanksProxyModel.setSourceModel(self.noClassRanksModel) self.noClassRanksView.setModel(self.noClassRanksProxyModel) self.noClassRanksView.setColumnWidth(0, 20) self.noClassRanksView.selectionModel().selectionChanged.connect( self.commit ) self.noClassRanksView.pressed.connect(self.onSelectItem) self.noClassRanksView.horizontalHeader().sectionClicked.connect( self.headerClick ) self.noClassRanksView.verticalHeader().sectionClicked.connect( self.onSelectItem ) if self.headerState[2] is not None: self.noClassRanksView.horizontalHeader().restoreState( self.headerState[2]) # Switch the current view to Discrete self.switchRanksMode(0) self.resetInternals() self.updateDelegates() self.updateVisibleScoreColumns() self.resize(690, 500) self.measure_scores = table((len(self.measures), 0), None) def switchRanksMode(self, index): """ Switch between discrete/continuous/no_class mode """ self.rankMode = index self.ranksViewStack.setCurrentIndex(index) if index == 0: self.ranksView = self.discRanksView self.ranksModel = self.discRanksModel self.ranksProxyModel = self.discRanksProxyModel self.measures = self.discMeasures self.selected_checks = self.cls_default_selected self.reg_scoring_box.setSizePolicy(QSizePolicy.Ignored, QSizePolicy.Ignored) self.cls_scoring_box.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Expanding) elif index == 1: self.ranksView = self.contRanksView self.ranksModel = self.contRanksModel self.ranksProxyModel = self.contRanksProxyModel self.measures = self.contMeasures self.selected_checks = self.reg_default_selected self.cls_scoring_box.setSizePolicy(QSizePolicy.Ignored, QSizePolicy.Ignored) self.reg_scoring_box.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Expanding) else: self.ranksView = self.noClassRanksView self.ranksModel = self.noClassRanksModel self.ranksProxyModel = self.noClassRanksProxyModel self.measures = [] self.selected_checks = set() self.reg_scoring_box.setSizePolicy(QSizePolicy.Ignored, QSizePolicy.Ignored) self.cls_scoring_box.setSizePolicy(QSizePolicy.Ignored, QSizePolicy.Ignored) shape = (len(self.measures) + len(self.learners), 0) self.measure_scores = table(shape, None) self.update_scores = False for check, score in zip(self.score_checks, SCORES): check.setChecked(score.name in self.selected_checks) self.update_scores = True self.score_stack.setCurrentIndex(index) self.updateVisibleScoreColumns() @check_sql_input def setData(self, data): self.closeContext() self.clear_messages() self.resetInternals() self.data = data self.switchRanksMode(0) if self.data is not None: domain = self.data.domain attrs = domain.attributes self.usefulAttributes = [attr for attr in attrs if attr.is_discrete or attr.is_continuous] if domain.has_continuous_class: self.switchRanksMode(1) elif not domain.class_var: self.Warning.no_target_var() self.switchRanksMode(2) elif not domain.has_discrete_class: self.Error.invalid_type(type(domain.class_var).__name__) if issparse(self.data.X): # keep only measures supporting sparse data self.measures = [m for m in self.measures if m.score.supports_sparse_data] self.ranksModel.setRowCount(len(attrs)) for i, a in enumerate(attrs): if a.is_discrete: v = len(a.values) else: v = "C" item = ScoreValueItem() item.setData(v, Qt.DisplayRole) self.ranksModel.setItem(i, 0, item) item = QStandardItem(a.name) item.setData(gui.attributeIconDict[a], Qt.DecorationRole) self.ranksModel.setVerticalHeaderItem(i, item) shape = (len(self.measures) + len(self.learners), len(attrs)) self.measure_scores = table(shape, None) self.updateScores() else: self.send("Scores", None) self.selected_rows = [] self.openContext(data) self.selectMethodChanged() self.commit() def get_selection(self): selection = self.ranksView.selectionModel().selection() return list(set(ind.row() for ind in selection.indexes())) def set_learner(self, learner, lid=None): if learner is None and lid is not None: del self.learners[lid] elif learner is not None: self.learners[lid] = score_meta( learner.name, learner.name, learner ) attrs_len = 0 if not self.data else len(self.data.domain.attributes) shape = (len(self.learners), attrs_len) self.measure_scores = self.measure_scores[:len(self.measures)] self.measure_scores += table(shape, None) self.contRanksModel.setHorizontalHeaderLabels(self.contRanksLabels) self.discRanksModel.setHorizontalHeaderLabels(self.discRanksLabels) self.noClassRanksModel.setHorizontalHeaderLabels( self.noClassRanksLabels) measures_mask = [False] * len(self.measures) measures_mask += [True for _ in self.learners] self.updateScores(measures_mask) self.commit() def updateScores(self, measuresMask=None): """ Update the current computed scores. If `measuresMask` is given it must be an list of bool values indicating what measures should be recomputed. """ if not self.data: return if self.data.has_missing(): self.information("Missing values have been imputed.") measures = self.measures + [v for k, v in self.learners.items()] if measuresMask is None: # Update all selected measures measuresMask = [self.selectedMeasures.get(m.name) for m in self.measures] measuresMask = measuresMask + [v.name for k, v in self.learners.items()] data = self.data learner_col = len(self.measures) if len(measuresMask) <= len(self.measures) or \ measuresMask[len(self.measures)]: self.labels = [] self.Error.inadequate_learner.clear() self.setStatusMessage("Running") with self.progressBar(): n_measure_update = len([x for x in measuresMask if x is not False]) count = 0 for index, (meas, mask) in enumerate(zip(measures, measuresMask)): if not mask: continue self.progressBarSet(90 * count / n_measure_update) count += 1 if index < len(self.measures): estimator = meas.score() try: self.measure_scores[index] = estimator(data) except ValueError: self.measure_scores[index] = [] for attr in data.domain.attributes: try: self.measure_scores[index].append( estimator(data, attr)) except ValueError: self.measure_scores[index].append(None) else: learner = meas.score if isinstance(learner, Learner) and \ not learner.check_learner_adequacy(self.data.domain): self.Error.inadequate_learner( learner.learner_adequacy_err_msg) scores = table((1, len(data.domain.attributes))) else: scores = meas.score.score_data(data) for i, row in enumerate(scores): self.labels.append(meas.shortname + str(i + 1)) if len(self.measure_scores) > learner_col: self.measure_scores[learner_col] = row else: self.measure_scores.append(row) learner_col += 1 self.progressBarSet(90) self.contRanksModel.setHorizontalHeaderLabels( self.contRanksLabels + self.labels ) self.discRanksModel.setHorizontalHeaderLabels( self.discRanksLabels + self.labels ) self.noClassRanksModel.setHorizontalHeaderLabels( self.noClassRanksLabels + self.labels ) self.updateRankModel(measuresMask) self.ranksProxyModel.invalidate() self.selectMethodChanged() self.send("Scores", self.create_scores_table(self.labels)) self.setStatusMessage("") def updateRankModel(self, measuresMask): """ Update the rankModel. """ values = [] diff = len(self.measure_scores) - len(measuresMask) if len(measuresMask): measuresMask += [measuresMask[-1]] * diff for i in range(self.ranksModel.columnCount() - 1, len(self.measure_scores), -1): self.ranksModel.removeColumn(i) for i, (scores, m) in enumerate(zip(self.measure_scores, measuresMask)): if not m and self.ranksModel.item(0, i + 1): values.append([]) continue values_one = [] for j, _score in enumerate(scores): values_one.append(_score) item = self.ranksModel.item(j, i + 1) if not item: item = ScoreValueItem() self.ranksModel.setItem(j, i + 1, item) item.setData(_score, Qt.DisplayRole) values.append(values_one) for i, (vals, m) in enumerate(zip(values, measuresMask)): if not m: continue valid_vals = [v for v in vals if v is not None] if valid_vals: vmin, vmax = min(valid_vals), max(valid_vals) for j, v in enumerate(vals): if v is not None: # Set the bar ratio role for i-th measure. ratio = float((v - vmin) / ((vmax - vmin) or 1)) item = self.ranksModel.item(j, i + 1) item.setData(ratio, gui.BarRatioRole) self.ranksView.setColumnWidth(0, 20) self.ranksView.resizeRowsToContents() def resetInternals(self): self.data = None self.usefulAttributes = [] self.ranksModel.setRowCount(0) def onSelectItem(self, index): """ Called when the user selects/unselects an item in the table view. """ self.selectMethod = OWRank.SelectManual # Manual self.selectButtons.button(self.selectMethod).setChecked(True) self.commit() def setSelectMethod(self, method): if self.selectMethod != method: self.selectMethod = method self.selectButtons.button(method).setChecked(True) self.selectMethodChanged() def selectMethodChanged(self): self.autoSelection() self.ranksView.setFocus() def nSelectedChanged(self): self.selectMethod = OWRank.SelectNBest self.selectButtons.button(self.selectMethod).setChecked(True) self.selectMethodChanged() def autoSelection(self): selModel = self.ranksView.selectionModel() rowCount = self.ranksModel.rowCount() columnCount = self.ranksModel.columnCount() model = self.ranksProxyModel if self.selectMethod == OWRank.SelectNone: selection = QItemSelection() elif self.selectMethod == OWRank.SelectAll: selection = QItemSelection( model.index(0, 0), model.index(rowCount - 1, columnCount - 1) ) elif self.selectMethod == OWRank.SelectNBest: nSelected = min(self.nSelected, rowCount) selection = QItemSelection( model.index(0, 0), model.index(nSelected - 1, columnCount - 1) ) else: selection = QItemSelection() if len(self.selected_rows): selection = QItemSelection() for row in self.selected_rows: selection.append(QItemSelectionRange( model.index(row, 0), model.index(row, columnCount - 1))) selModel.select(selection, QItemSelectionModel.ClearAndSelect) def headerClick(self, index): if index >= 1 and self.selectMethod == OWRank.SelectNBest: # Reselect the top ranked attributes self.autoSelection() # Store the header states disc = bytes(self.discRanksView.horizontalHeader().saveState()) cont = bytes(self.contRanksView.horizontalHeader().saveState()) no_class = bytes(self.noClassRanksView.horizontalHeader().saveState()) self.headerState = [disc, cont, no_class] def measuresSelectionChanged(self, measure): """Measure selection has changed. Update column visibility. """ checked = self.selectedMeasures[measure.name] self.selectedMeasures[measure.name] = not checked if not checked: self.selected_checks.add(measure.name) elif measure.name in self.selected_checks: self.selected_checks.remove(measure.name) measures_mask = [False] * len(self.measures) measures_mask += [False for _ in self.learners] # Update scores for shown column if they are not yet computed. if measure in self.measures and self.measure_scores: index = self.measures.index(measure) if all(s is None for s in self.measure_scores[index]): measures_mask[index] = True if self.update_scores: self.updateScores(measures_mask) self.updateVisibleScoreColumns() def updateVisibleScoreColumns(self): """ Update the visible columns of the scores view. """ for i, measure in enumerate(self.measures): shown = self.selectedMeasures.get(measure.name) self.ranksView.setColumnHidden(i + 1, not shown) self.ranksView.setColumnWidth(i + 1, 100) index = self.ranksView.horizontalHeader().sortIndicatorSection() if self.ranksView.isColumnHidden(index): self.headerState[self.rankMode] = None if self.headerState[self.rankMode] is None: def get_sort_by_col(measures, selected_measures): cols = [i + 1 for i, m in enumerate(measures) if m.name in selected_measures] return cols[0] if cols else len(measures) + 1 col = get_sort_by_col(self.measures, self.selected_checks) self.ranksView.sortByColumn(col, Qt.DescendingOrder) self.autoSelection() def updateDelegates(self): self.contRanksView.setItemDelegate(gui.ColoredBarItemDelegate(self)) self.discRanksView.setItemDelegate(gui.ColoredBarItemDelegate(self)) self.noClassRanksView.setItemDelegate(gui.ColoredBarItemDelegate(self)) def send_report(self): if not self.data: return self.report_domain("Input", self.data.domain) self.report_table("Ranks", self.ranksView, num_format="{:.3f}") if self.out_domain_desc is not None: self.report_items("Output", self.out_domain_desc) def commit(self): self.selected_rows = self.get_selection() if self.data and len(self.data.domain.attributes) == len( self.selected_rows): self.selectMethod = OWRank.SelectAll self.selectButtons.button(self.selectMethod).setChecked(True) selected = self.selectedAttrs() if not self.data or not selected: self.send("Reduced Data", None) self.out_domain_desc = None else: data = Table(Domain(selected, self.data.domain.class_var, self.data.domain.metas), self.data) self.send("Reduced Data", data) self.out_domain_desc = report.describe_domain(data.domain) def selectedAttrs(self): if self.data: inds = self.ranksView.selectionModel().selectedRows(0) source = self.ranksProxyModel.mapToSource inds = map(source, inds) inds = [ind.row() for ind in inds] return [self.data.domain.attributes[i] for i in inds] else: return [] def create_scores_table(self, labels): indices = [i for i, m in enumerate(self.measures) if self.selectedMeasures.get(m.name, False)] measures = [s.name for s in self.measures if self.selectedMeasures.get(s.name, False)] measures += [label for label in labels] if not measures: return None features = [ContinuousVariable(s) for s in measures] metas = [StringVariable("Feature name")] domain = Domain(features, metas=metas) scores = np.array([row for i, row in enumerate(self.measure_scores) if i in indices or i >= len(self.measures)]).T feature_names = np.array([a.name for a in self.data.domain.attributes]) # Reshape to 2d array as Table does not like 1d arrays feature_names = feature_names[:, None] new_table = Table(domain, scores, metas=feature_names) new_table.name = "Feature Scores" return new_table @classmethod def migrate_settings(cls, settings, version): if not version: # Before fc5caa1e1d716607f1f5c4e0b0be265c23280fa0 # headerState had length 2 headerState = settings.get("headerState", None) if headerState is not None and \ isinstance(headerState, tuple) and \ len(headerState) < 3: headerState = (list(headerState) + [None] * 3)[:3] settings["headerState"] = headerState
def __init__(self): super().__init__() self.corpus = None # Corpus self.corpus_docs = None # Documents generated from Corpus self.output_mask = [] # Output corpus indices self.doc_webview = None # WebView for showing content self.search_features = [] # two copies are needed since Display allows drag & drop self.display_features = [] # Info attributes self.update_info() info_box = gui.widgetBox(self.controlArea, 'Info') gui.label(info_box, self, 'Documents: %(n_documents)s') gui.label(info_box, self, 'Preprocessed: %(is_preprocessed)s') gui.label(info_box, self, ' ◦ Tokens: %(n_tokens)s') gui.label(info_box, self, ' ◦ Types: %(n_types)s') gui.label(info_box, self, 'POS tagged: %(is_pos_tagged)s') gui.label(info_box, self, 'N-grams range: %(ngram_range)s') gui.label(info_box, self, 'Matching: %(n_matching)s') # Search features self.search_listbox = gui.listBox( self.controlArea, self, 'search_indices', 'search_features', selectionMode=QListView.ExtendedSelection, box='Search features', callback=self.regenerate_docs,) # Display features display_box = gui.widgetBox(self.controlArea, 'Display features') self.display_listbox = gui.listBox( display_box, self, 'display_indices', 'display_features', selectionMode=QListView.ExtendedSelection, callback=self.show_docs, enableDragDrop=True) self.show_tokens_checkbox = gui.checkBox(display_box, self, 'show_tokens', 'Show Tokens && Tags', callback=self.show_docs) # Auto-commit box gui.auto_commit(self.controlArea, self, 'autocommit', 'Send data', 'Auto send is on') # Search self.filter_input = gui.lineEdit(self.mainArea, self, '', orientation=Qt.Horizontal, sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed), label='RegExp Filter:') self.filter_input.textChanged.connect(self.refresh_search) # Main area self.splitter = QSplitter( orientation=Qt.Horizontal, childrenCollapsible=False, ) # Document list self.doc_list = QTableView() self.doc_list.setSelectionBehavior(QTableView.SelectRows) self.doc_list.setSelectionMode(QTableView.ExtendedSelection) self.doc_list.setEditTriggers(QAbstractItemView.NoEditTriggers) self.doc_list.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch) self.doc_list.horizontalHeader().setVisible(False) self.splitter.addWidget(self.doc_list) self.doc_list_model = QStandardItemModel(self) self.doc_list.setModel(self.doc_list_model) self.doc_list.selectionModel().selectionChanged.connect(self.show_docs) # Document contents # For PyQt5 WebEngine's setHtml grabs the focus and makes typing hard # More info: http://stackoverflow.com/questions/36609489 # To bypass the annoying behaviour disable the widget for WebEngine self.doc_webview = gui.WebviewWidget(self.splitter, self, debug=True, enabled=HAVE_WEBKIT) self.mainArea.layout().addWidget(self.splitter)
class OWCorpusViewer(OWWidget): name = "Corpus Viewer" description = "Display corpus contents." icon = "icons/CorpusViewer.svg" priority = 500 class Inputs: corpus = Input("Corpus", Corpus, replaces=["Data"]) class Outputs: matching_docs = Output("Matching Docs", Corpus, default=True) other_docs = Output("Other Docs", Corpus) settingsHandler = PerfectDomainContextHandler( match_values = PerfectDomainContextHandler.MATCH_VALUES_ALL ) search_indices = ContextSetting([], exclude_metas=False) # features included in search display_indices = ContextSetting([], exclude_metas=False) # features for display display_features = ContextSetting([], exclude_metas=False) regexp_filter = ContextSetting("") selection = [0] # TODO: DataHashContextHandler show_tokens = Setting(False) autocommit = Setting(True) class Warning(OWWidget.Warning): no_feats_search = Msg('No features included in search.') no_feats_display = Msg('No features selected for display.') def __init__(self): super().__init__() self.corpus = None # Corpus self.corpus_docs = None # Documents generated from Corpus self.output_mask = [] # Output corpus indices self.doc_webview = None # WebView for showing content self.search_features = [] # two copies are needed since Display allows drag & drop self.display_list_indices = [0] # Info attributes self.update_info() info_box = gui.widgetBox(self.controlArea, 'Info') gui.label(info_box, self, 'Documents: %(n_documents)s') gui.label(info_box, self, 'Preprocessed: %(is_preprocessed)s') gui.label(info_box, self, ' ◦ Tokens: %(n_tokens)s') gui.label(info_box, self, ' ◦ Types: %(n_types)s') gui.label(info_box, self, 'POS tagged: %(is_pos_tagged)s') gui.label(info_box, self, 'N-grams range: %(ngram_range)s') gui.label(info_box, self, 'Matching: %(n_matching)s') # Search features self.search_listbox = gui.listBox( self.controlArea, self, 'search_indices', 'search_features', selectionMode=QListView.ExtendedSelection, box='Search features', callback=self.search_features_changed) # Display features display_box = gui.widgetBox(self.controlArea, 'Display features') self.display_listbox = gui.listBox( display_box, self, 'display_list_indices', 'display_features', selectionMode=QListView.ExtendedSelection, callback=self.show_docs, enableDragDrop=True) self.show_tokens_checkbox = gui.checkBox(display_box, self, 'show_tokens', 'Show Tokens && Tags', callback=self.show_docs) # Auto-commit box gui.auto_commit(self.controlArea, self, 'autocommit', 'Send data', 'Auto send is on') # Search self.filter_input = gui.lineEdit(self.mainArea, self, 'regexp_filter', orientation=Qt.Horizontal, sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed), label='RegExp Filter:') self.filter_input.textChanged.connect(self.refresh_search) # Main area self.splitter = QSplitter( orientation=Qt.Horizontal, childrenCollapsible=False, ) # Document list self.doc_list = QTableView() self.doc_list.setSelectionBehavior(QTableView.SelectRows) self.doc_list.setSelectionMode(QTableView.ExtendedSelection) self.doc_list.setEditTriggers(QAbstractItemView.NoEditTriggers) self.doc_list.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch) self.doc_list.horizontalHeader().setVisible(False) self.splitter.addWidget(self.doc_list) self.doc_list_model = QStandardItemModel(self) self.doc_list.setModel(self.doc_list_model) self.doc_list.selectionModel().selectionChanged.connect(self.show_docs) # Document contents self.doc_webview = gui.WebviewWidget(self.splitter, debug=False) self.mainArea.layout().addWidget(self.splitter) def copy_to_clipboard(self): text = self.doc_webview.selectedText() QApplication.clipboard().setText(text) @Inputs.corpus def set_data(self, corpus=None): self.closeContext() self.reset_widget() self.corpus = corpus self.search_features = [] if corpus is not None: domain = self.corpus.domain # Enable/disable tokens checkbox if not self.corpus.has_tokens(): self.show_tokens_checkbox.setCheckState(False) self.show_tokens_checkbox.setEnabled(self.corpus.has_tokens()) self.search_features = list(filter_visible(chain(domain.variables, domain.metas))) self.display_features = list(filter_visible(chain(domain.variables, domain.metas))) self.search_indices = list(range(len(self.search_features))) self.display_indices = list(range(len(self.display_features))) self.selection = [0] self.openContext(self.corpus) self.display_list_indices = self.display_indices self.regenerate_docs() self.list_docs() self.update_info() self.set_selection() self.show_docs() self.commit() def reset_widget(self): # Corpus self.corpus = None self.corpus_docs = None self.output_mask = [] self.display_features = [] # Widgets self.search_listbox.clear() self.display_listbox.clear() self.filter_input.clear() self.update_info() # Models/vars self.search_features.clear() self.search_indices.clear() self.display_indices.clear() self.doc_list_model.clear() # Warnings self.Warning.clear() # WebView self.doc_webview.setHtml('') def list_docs(self): """ List documents into the left scrolling area """ if self.corpus_docs is None: return search_keyword = self.regexp_filter.strip('|') try: reg = re.compile(search_keyword, re.IGNORECASE) except sre_constants.error: return def is_match(x): return not bool(search_keyword) or reg.search(x) self.output_mask.clear() self.doc_list_model.clear() for i, (doc, title, content) in enumerate(zip(self.corpus, self.corpus.titles, self.corpus_docs)): if is_match(content): item = QStandardItem() item.setData(title, Qt.DisplayRole) item.setData(doc, Qt.UserRole) self.doc_list_model.appendRow(item) self.output_mask.append(i) def reset_selection(self): if self.doc_list_model.rowCount() > 0: self.doc_list.selectRow(0) # Select the first document else: self.doc_webview.setHtml('') def set_selection(self): view = self.doc_list if len(self.selection): selection = QItemSelection() for row in self.selection: selection.append( QItemSelectionRange( view.model().index(row, 0), view.model().index(row, 0) ) ) view.selectionModel().select( selection, QItemSelectionModel.ClearAndSelect) def show_docs(self): """ Show the selected documents in the right area """ HTML = ''' <!doctype html> <html> <head> <script type="text/javascript" src="resources/jquery-3.1.1.min.js"> </script> <script type="text/javascript" src="resources/jquery.mark.min.js"> </script> <script type="text/javascript" src="resources/highlighter.js"> </script> <meta charset='utf-8'> <style> table {{ border-collapse: collapse; }} mark {{ background: #FFCD28; }} tr > td {{ padding-bottom: 3px; padding-top: 3px; }} body {{ font-family: Helvetica; font-size: 10pt; }} .line {{ border-bottom: 1px solid #000; }} .separator {{ height: 5px; }} .variables {{ vertical-align: top; padding-right: 10px; }} .content {{ /* Adopted from https://css-tricks.com/snippets/css/prevent-long-urls-from-breaking-out-of-container/ */ /* These are technically the same, but use both */ overflow-wrap: break-word; word-wrap: break-word; -ms-word-break: break-all; /* This is the dangerous one in WebKit, as it breaks things wherever */ word-break: break-all; /* Instead use this non-standard one: */ word-break: break-word; /* Adds a hyphen where the word breaks, if supported (No Blink) */ -ms-hyphens: auto; -moz-hyphens: auto; -webkit-hyphens: auto; hyphens: auto; }} .token {{ padding: 3px; border: 1px #B0B0B0 solid; margin-right: 5px; margin-bottom: 5px; display: inline-block; }} img {{ max-width: 100%; }} </style> </head> <body> {} </body> </html> ''' self.display_indices = self.display_list_indices if self.corpus is None: return self.Warning.no_feats_display.clear() if len(self.display_indices) == 0: self.Warning.no_feats_display() if self.show_tokens: tokens = list(self.corpus.ngrams_iterator(include_postags=True)) marked_search_features = [f for i, f in enumerate(self.search_features) if i in self.search_indices] html = '<table>' selection = [i.row() for i in self.doc_list.selectionModel().selectedRows()] if selection != []: self.selection = selection for doc_count, index in enumerate(self.doc_list.selectionModel().selectedRows()): if doc_count > 0: # add split html += '<tr class="line separator"><td/><td/></tr>' \ '<tr class="separator"><td/><td/></tr>' row_ind = index.data(Qt.UserRole).row_index for ind in self.display_indices: feature = self.display_features[ind] value = str(index.data(Qt.UserRole)[feature.name]) if feature in marked_search_features: value = self.__mark_text(value) value = value.replace('\n', '<br/>') is_image = feature.attributes.get('type', '') == 'image' if is_image and value != '?': value = '<img src="{}"></img>'.format(value) html += '<tr><td class="variables"><strong>{}:</strong></td>' \ '<td class="content">{}</td></tr>'.format( feature.name, value) if self.show_tokens: html += '<tr><td class="variables"><strong>Tokens & Tags:</strong></td>' \ '<td>{}</td></tr>'.format(''.join('<span class="token">{}</span>'.format( token) for token in tokens[row_ind])) html += '</table>' base = QUrl.fromLocalFile(__file__) self.doc_webview.setHtml(HTML.format(html), base) def __mark_text(self, text): search_keyword = self.regexp_filter.strip('|') if not search_keyword: return text try: reg = re.compile(search_keyword, re.IGNORECASE | re.MULTILINE) except sre_constants.error: return text matches = list(reg.finditer(text)) if not matches: return text text = list(text) for m in matches[::-1]: text[m.start():m.end()] = list('<mark data-markjs="true">{}</mark>'\ .format("".join(text[m.start():m.end()]))) return "".join(text) def search_features_changed(self): self.regenerate_docs() self.refresh_search() def regenerate_docs(self): self.corpus_docs = None self.Warning.no_feats_search.clear() if self.corpus is not None: feats = [self.search_features[i] for i in self.search_indices] if len(feats) == 0: self.Warning.no_feats_search() self.corpus_docs = self.corpus.documents_from_features(feats) def refresh_search(self): if self.corpus is not None: self.list_docs() self.reset_selection() self.update_info() self.commit() def update_info(self): if self.corpus is not None: self.n_documents = len(self.corpus) self.n_matching = '{}/{}'.format(self.doc_list_model.rowCount(), self.n_documents) self.n_tokens = sum(map(len, self.corpus.tokens)) if self.corpus.has_tokens() else 'n/a' self.n_types = len(self.corpus.dictionary) if self.corpus.has_tokens() else 'n/a' self.is_preprocessed = self.corpus.has_tokens() self.is_pos_tagged = self.corpus.pos_tags is not None self.ngram_range = '{}-{}'.format(*self.corpus.ngram_range) else: self.n_documents = '' self.n_matching = '' self.n_tokens = '' self.n_types = '' self.is_preprocessed = '' self.is_pos_tagged = '' self.ngram_range = '' def commit(self): if self.corpus is not None: matched = self.corpus[self.output_mask] output_mask = set(self.output_mask) unmatched_mask = [i for i in range(len(self.corpus)) if i not in output_mask] unmatched = self.corpus[unmatched_mask] self.Outputs.matching_docs.send(matched) self.Outputs.other_docs.send(unmatched) else: self.Outputs.matching_docs.send(None) self.Outputs.other_docs.send(None) def send_report(self): self.report_items(( ("Query", self.regexp_filter), ("Matching documents", self.n_matching), ))
class OWConcordance(OWWidget): name = "Concordance" description = "Display the context of the word." icon = "icons/Concordance.svg" priority = 30000 inputs = [ ('Corpus', Table, 'set_corpus'), ('Query Word', Topic, 'set_word_from_input'), ] outputs = [('Selected Documents', Table, )] autocommit = Setting(True) context_width = Setting(5) word = Setting("") # TODO Set selection settings. class Warning(OWWidget.Warning): multiple_words_on_input = Msg("Multiple query words on input. " "Only the first one is considered!") def __init__(self): super().__init__() self.corpus = None # Corpus self.n_documents = '' # Info on docs self.n_matching = '' # Info on docs matching the word self.n_tokens = '' # Info on tokens self.n_types = '' # Info on types (unique tokens) # Info attributes info_box = gui.widgetBox(self.controlArea, 'Info') gui.label(info_box, self, 'Documents: %(n_documents)s') gui.label(info_box, self, 'Tokens: %(n_tokens)s') gui.label(info_box, self, 'Types: %(n_types)s') gui.label(info_box, self, 'Matching: %(n_matching)s') # Width parameter gui.spin(self.controlArea, self, 'context_width', 3, 10, box=True, label="Number of words:", callback=self.set_width) gui.rubber(self.controlArea) # Search c_box = gui.widgetBox(self.mainArea, orientation="vertical") self.input = gui.lineEdit( c_box, self, 'word', orientation=Qt.Horizontal, sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed), label='Query:', callback=self.set_word, callbackOnType=True) self.input.setFocus() # Concordances view self.conc_view = QTableView() self.model = ConcordanceModel() self.conc_view.setModel(self.model) self.conc_view.setWordWrap(False) self.conc_view.setSelectionBehavior(QTableView.SelectRows) self.conc_view.setSelectionModel(DocumentSelectionModel(self.model)) self.conc_view.setItemDelegate(HorizontalGridDelegate()) # connect selectionChanged to self.commit(), which will be # updated by gui.auto_commit() self.conc_view.selectionModel().selectionChanged.connect(lambda: self.commit()) self.conc_view.horizontalHeader().hide() self.conc_view.setShowGrid(False) self.mainArea.layout().addWidget(self.conc_view) self.set_width() # Auto-commit box gui.auto_commit(self.controlArea, self, 'autocommit', 'Commit', 'Auto commit is on') def sizeHint(self): # pragma: no cover return QSize(600, 400) def set_width(self): sel = self.conc_view.selectionModel().selection() self.model.set_width(self.context_width) if sel: self.conc_view.selectionModel().select(sel, QItemSelectionModel.SelectCurrent | QItemSelectionModel.Rows) def set_corpus(self, data=None): self.corpus = data if data is not None and not isinstance(data, Corpus): self.corpus = Corpus.from_table(data.domain, data) self.model.set_corpus(self.corpus) self.update_widget() self.commit() def set_word_from_input(self, topic): self.Warning.multiple_words_on_input.clear() have_word = topic is not None and len(topic) > 0 self.input.setEnabled(not have_word) if have_word: if len(topic) > 1: self.Warning.multiple_words_on_input() self.word = topic.metas[0, 0] self.set_word() def set_word(self): self.model.set_word(self.word) self.update_widget() def resize_columns(self): col_width = (self.conc_view.width() - self.conc_view.columnWidth(1)) / 2 - 12 self.conc_view.setColumnWidth(0, col_width) self.conc_view.setColumnWidth(2, col_width) def resizeEvent(self, event): # pragma: no cover super().resizeEvent(event) self.resize_columns() def update_widget(self): self.conc_view.resizeColumnToContents(1) self.resize_columns() self.conc_view.resizeRowsToContents() if self.corpus is not None: self.n_documents = len(self.corpus) self.n_matching = '{}/{}'.format( self.model.matching_docs() if self.word else 0, self.n_documents) self.n_tokens = sum(map(len, self.corpus.tokens)) \ if self.corpus.has_tokens() else 'n/a' self.n_types = len(self.corpus.dictionary) \ if self.corpus.has_tokens() else 'n/a' else: self.n_documents = '' self.n_matching = '' self.n_tokens = '' self.n_types = '' def commit(self): rows = [sel_range.top() for sel_range in self.conc_view.selectionModel().selection()] selected_docs = sorted(set(self.model.word_index[row][0] for row in rows)) if selected_docs: selected = self.corpus[selected_docs] self.send("Selected Documents", selected) else: self.send("Selected Documents", None)
def __init__(self): super().__init__() self.measure_scores = None self.update_scores = True self.usefulAttributes = [] self.learners = {} self.labels = [] self.out_domain_desc = None self.all_measures = SCORES self.selectedMeasures = dict([(m.name, True) for m in self.all_measures]) # Discrete (0) or continuous (1) class mode self.rankMode = 0 self.data = None self.discMeasures = [ m for m in self.all_measures if issubclass(DiscreteVariable, m.score.class_type) ] self.contMeasures = [ m for m in self.all_measures if issubclass(ContinuousVariable, m.score.class_type) ] self.score_checks = [] self.cls_scoring_box = gui.vBox(None, "Scoring for Classification") self.reg_scoring_box = gui.vBox(None, "Scoring for Regression") boxes = [self.cls_scoring_box] * 7 + [self.reg_scoring_box] * 2 for _score, var, box in zip(SCORES, self._score_vars, boxes): check = gui.checkBox( box, self, var, label=_score.name, callback=lambda val=_score: self.measuresSelectionChanged(val)) self.score_checks.append(check) self.score_stack = QStackedWidget(self) self.score_stack.addWidget(self.cls_scoring_box) self.score_stack.addWidget(self.reg_scoring_box) self.score_stack.addWidget(QWidget()) self.controlArea.layout().addWidget(self.score_stack) gui.rubber(self.controlArea) selMethBox = gui.vBox(self.controlArea, "Select Attributes", addSpace=True) grid = QGridLayout() grid.setContentsMargins(6, 0, 6, 0) self.selectButtons = QButtonGroup() self.selectButtons.buttonClicked[int].connect(self.setSelectMethod) def button(text, buttonid, toolTip=None): b = QRadioButton(text) self.selectButtons.addButton(b, buttonid) if toolTip is not None: b.setToolTip(toolTip) return b b1 = button(self.tr("None"), OWRank.SelectNone) b2 = button(self.tr("All"), OWRank.SelectAll) b3 = button(self.tr("Manual"), OWRank.SelectManual) b4 = button(self.tr("Best ranked:"), OWRank.SelectNBest) s = gui.spin(selMethBox, self, "nSelected", 1, 100, callback=self.nSelectedChanged) grid.addWidget(b1, 0, 0) grid.addWidget(b2, 1, 0) grid.addWidget(b3, 2, 0) grid.addWidget(b4, 3, 0) grid.addWidget(s, 3, 1) self.selectButtons.button(self.selectMethod).setChecked(True) selMethBox.layout().addLayout(grid) gui.auto_commit(selMethBox, self, "auto_apply", "Send", box=False) # Discrete, continuous and no_class table views are stacked self.ranksViewStack = QStackedLayout() self.mainArea.layout().addLayout(self.ranksViewStack) self.discRanksView = QTableView() self.ranksViewStack.addWidget(self.discRanksView) self.discRanksView.setSelectionBehavior(QTableView.SelectRows) self.discRanksView.setSelectionMode(QTableView.MultiSelection) self.discRanksView.setSortingEnabled(True) self.discRanksLabels = ["#"] + [m.shortname for m in self.discMeasures] self.discRanksModel = QStandardItemModel(self) self.discRanksModel.setHorizontalHeaderLabels(self.discRanksLabels) self.discRanksProxyModel = MySortProxyModel(self) self.discRanksProxyModel.setSourceModel(self.discRanksModel) self.discRanksView.setModel(self.discRanksProxyModel) self.discRanksView.setColumnWidth(0, 20) self.discRanksView.selectionModel().selectionChanged.connect( self.commit) self.discRanksView.pressed.connect(self.onSelectItem) self.discRanksView.horizontalHeader().sectionClicked.connect( self.headerClick) self.discRanksView.verticalHeader().sectionClicked.connect( self.onSelectItem) if self.headerState[0] is not None: self.discRanksView.horizontalHeader().restoreState( self.headerState[0]) self.contRanksView = QTableView() self.ranksViewStack.addWidget(self.contRanksView) self.contRanksView.setSelectionBehavior(QTableView.SelectRows) self.contRanksView.setSelectionMode(QTableView.MultiSelection) self.contRanksView.setSortingEnabled(True) self.contRanksLabels = ["#"] + [m.shortname for m in self.contMeasures] self.contRanksModel = QStandardItemModel(self) self.contRanksModel.setHorizontalHeaderLabels(self.contRanksLabels) self.contRanksProxyModel = MySortProxyModel(self) self.contRanksProxyModel.setSourceModel(self.contRanksModel) self.contRanksView.setModel(self.contRanksProxyModel) self.contRanksView.setColumnWidth(0, 20) self.contRanksView.selectionModel().selectionChanged.connect( self.commit) self.contRanksView.pressed.connect(self.onSelectItem) self.contRanksView.horizontalHeader().sectionClicked.connect( self.headerClick) self.contRanksView.verticalHeader().sectionClicked.connect( self.onSelectItem) if self.headerState[1] is not None: self.contRanksView.horizontalHeader().restoreState( self.headerState[1]) self.noClassRanksView = QTableView() self.ranksViewStack.addWidget(self.noClassRanksView) self.noClassRanksView.setSelectionBehavior(QTableView.SelectRows) self.noClassRanksView.setSelectionMode(QTableView.MultiSelection) self.noClassRanksView.setSortingEnabled(True) self.noClassRanksLabels = ["#"] self.noClassRanksModel = QStandardItemModel(self) self.noClassRanksModel.setHorizontalHeaderLabels( self.noClassRanksLabels) self.noClassRanksProxyModel = MySortProxyModel(self) self.noClassRanksProxyModel.setSourceModel(self.noClassRanksModel) self.noClassRanksView.setModel(self.noClassRanksProxyModel) self.noClassRanksView.setColumnWidth(0, 20) self.noClassRanksView.selectionModel().selectionChanged.connect( self.commit) self.noClassRanksView.pressed.connect(self.onSelectItem) self.noClassRanksView.horizontalHeader().sectionClicked.connect( self.headerClick) self.noClassRanksView.verticalHeader().sectionClicked.connect( self.onSelectItem) if self.headerState[2] is not None: self.noClassRanksView.horizontalHeader().restoreState( self.headerState[2]) # Switch the current view to Discrete self.switchRanksMode(0) self.resetInternals() self.updateDelegates() self.updateVisibleScoreColumns() self.resize(690, 500) self.measure_scores = table((len(self.measures), 0), None)
class OWRank(OWWidget): name = "Rank" description = "Rank and filter data features by their relevance." icon = "icons/Rank.svg" priority = 1102 buttons_area_orientation = Qt.Vertical inputs = [("Data", Table, "setData"), ("Scorer", score.Scorer, "set_learner", widget.Multiple)] outputs = [("Reduced Data", Table, widget.Default), ("Scores", Table)] SelectNone, SelectAll, SelectManual, SelectNBest = range(4) cls_default_selected = Setting({"Gain Ratio", "Gini Decrease"}) reg_default_selected = Setting( {"Univariate Linear Regression", "RReliefF"}) selectMethod = Setting(SelectNBest) nSelected = Setting(5) auto_apply = Setting(True) # Header state for discrete/continuous/no_class scores headerState = Setting([None, None, None]) settings_version = 1 settingsHandler = DomainContextHandler() selected_rows = ContextSetting([]) gain = inf_gain = gini = anova = chi2 = ulr = relief = rrelief = fcbc = True _score_vars = [ "gain", "inf_gain", "gini", "anova", "chi2", "relief", "fcbc", "ulr", "rrelief" ] class Warning(OWWidget.Warning): no_target_var = Msg("Data does not have a target variable") class Error(OWWidget.Error): invalid_type = Msg("Cannot handle target variable type {}") inadequate_learner = Msg("{}") def __init__(self): super().__init__() self.measure_scores = None self.update_scores = True self.usefulAttributes = [] self.learners = {} self.labels = [] self.out_domain_desc = None self.all_measures = SCORES self.selectedMeasures = dict([(m.name, True) for m in self.all_measures]) # Discrete (0) or continuous (1) class mode self.rankMode = 0 self.data = None self.discMeasures = [ m for m in self.all_measures if issubclass(DiscreteVariable, m.score.class_type) ] self.contMeasures = [ m for m in self.all_measures if issubclass(ContinuousVariable, m.score.class_type) ] self.score_checks = [] self.cls_scoring_box = gui.vBox(None, "Scoring for Classification") self.reg_scoring_box = gui.vBox(None, "Scoring for Regression") boxes = [self.cls_scoring_box] * 7 + [self.reg_scoring_box] * 2 for _score, var, box in zip(SCORES, self._score_vars, boxes): check = gui.checkBox( box, self, var, label=_score.name, callback=lambda val=_score: self.measuresSelectionChanged(val)) self.score_checks.append(check) self.score_stack = QStackedWidget(self) self.score_stack.addWidget(self.cls_scoring_box) self.score_stack.addWidget(self.reg_scoring_box) self.score_stack.addWidget(QWidget()) self.controlArea.layout().addWidget(self.score_stack) gui.rubber(self.controlArea) selMethBox = gui.vBox(self.controlArea, "Select Attributes", addSpace=True) grid = QGridLayout() grid.setContentsMargins(6, 0, 6, 0) self.selectButtons = QButtonGroup() self.selectButtons.buttonClicked[int].connect(self.setSelectMethod) def button(text, buttonid, toolTip=None): b = QRadioButton(text) self.selectButtons.addButton(b, buttonid) if toolTip is not None: b.setToolTip(toolTip) return b b1 = button(self.tr("None"), OWRank.SelectNone) b2 = button(self.tr("All"), OWRank.SelectAll) b3 = button(self.tr("Manual"), OWRank.SelectManual) b4 = button(self.tr("Best ranked:"), OWRank.SelectNBest) s = gui.spin(selMethBox, self, "nSelected", 1, 100, callback=self.nSelectedChanged) grid.addWidget(b1, 0, 0) grid.addWidget(b2, 1, 0) grid.addWidget(b3, 2, 0) grid.addWidget(b4, 3, 0) grid.addWidget(s, 3, 1) self.selectButtons.button(self.selectMethod).setChecked(True) selMethBox.layout().addLayout(grid) gui.auto_commit(selMethBox, self, "auto_apply", "Send", box=False) # Discrete, continuous and no_class table views are stacked self.ranksViewStack = QStackedLayout() self.mainArea.layout().addLayout(self.ranksViewStack) self.discRanksView = QTableView() self.ranksViewStack.addWidget(self.discRanksView) self.discRanksView.setSelectionBehavior(QTableView.SelectRows) self.discRanksView.setSelectionMode(QTableView.MultiSelection) self.discRanksView.setSortingEnabled(True) self.discRanksLabels = ["#"] + [m.shortname for m in self.discMeasures] self.discRanksModel = QStandardItemModel(self) self.discRanksModel.setHorizontalHeaderLabels(self.discRanksLabels) self.discRanksProxyModel = MySortProxyModel(self) self.discRanksProxyModel.setSourceModel(self.discRanksModel) self.discRanksView.setModel(self.discRanksProxyModel) self.discRanksView.setColumnWidth(0, 20) self.discRanksView.selectionModel().selectionChanged.connect( self.commit) self.discRanksView.pressed.connect(self.onSelectItem) self.discRanksView.horizontalHeader().sectionClicked.connect( self.headerClick) self.discRanksView.verticalHeader().sectionClicked.connect( self.onSelectItem) if self.headerState[0] is not None: self.discRanksView.horizontalHeader().restoreState( self.headerState[0]) self.contRanksView = QTableView() self.ranksViewStack.addWidget(self.contRanksView) self.contRanksView.setSelectionBehavior(QTableView.SelectRows) self.contRanksView.setSelectionMode(QTableView.MultiSelection) self.contRanksView.setSortingEnabled(True) self.contRanksLabels = ["#"] + [m.shortname for m in self.contMeasures] self.contRanksModel = QStandardItemModel(self) self.contRanksModel.setHorizontalHeaderLabels(self.contRanksLabels) self.contRanksProxyModel = MySortProxyModel(self) self.contRanksProxyModel.setSourceModel(self.contRanksModel) self.contRanksView.setModel(self.contRanksProxyModel) self.contRanksView.setColumnWidth(0, 20) self.contRanksView.selectionModel().selectionChanged.connect( self.commit) self.contRanksView.pressed.connect(self.onSelectItem) self.contRanksView.horizontalHeader().sectionClicked.connect( self.headerClick) self.contRanksView.verticalHeader().sectionClicked.connect( self.onSelectItem) if self.headerState[1] is not None: self.contRanksView.horizontalHeader().restoreState( self.headerState[1]) self.noClassRanksView = QTableView() self.ranksViewStack.addWidget(self.noClassRanksView) self.noClassRanksView.setSelectionBehavior(QTableView.SelectRows) self.noClassRanksView.setSelectionMode(QTableView.MultiSelection) self.noClassRanksView.setSortingEnabled(True) self.noClassRanksLabels = ["#"] self.noClassRanksModel = QStandardItemModel(self) self.noClassRanksModel.setHorizontalHeaderLabels( self.noClassRanksLabels) self.noClassRanksProxyModel = MySortProxyModel(self) self.noClassRanksProxyModel.setSourceModel(self.noClassRanksModel) self.noClassRanksView.setModel(self.noClassRanksProxyModel) self.noClassRanksView.setColumnWidth(0, 20) self.noClassRanksView.selectionModel().selectionChanged.connect( self.commit) self.noClassRanksView.pressed.connect(self.onSelectItem) self.noClassRanksView.horizontalHeader().sectionClicked.connect( self.headerClick) self.noClassRanksView.verticalHeader().sectionClicked.connect( self.onSelectItem) if self.headerState[2] is not None: self.noClassRanksView.horizontalHeader().restoreState( self.headerState[2]) # Switch the current view to Discrete self.switchRanksMode(0) self.resetInternals() self.updateDelegates() self.updateVisibleScoreColumns() self.resize(690, 500) self.measure_scores = table((len(self.measures), 0), None) def switchRanksMode(self, index): """ Switch between discrete/continuous/no_class mode """ self.rankMode = index self.ranksViewStack.setCurrentIndex(index) if index == 0: self.ranksView = self.discRanksView self.ranksModel = self.discRanksModel self.ranksProxyModel = self.discRanksProxyModel self.measures = self.discMeasures self.selected_checks = self.cls_default_selected self.reg_scoring_box.setSizePolicy(QSizePolicy.Ignored, QSizePolicy.Ignored) self.cls_scoring_box.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Expanding) elif index == 1: self.ranksView = self.contRanksView self.ranksModel = self.contRanksModel self.ranksProxyModel = self.contRanksProxyModel self.measures = self.contMeasures self.selected_checks = self.reg_default_selected self.cls_scoring_box.setSizePolicy(QSizePolicy.Ignored, QSizePolicy.Ignored) self.reg_scoring_box.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Expanding) else: self.ranksView = self.noClassRanksView self.ranksModel = self.noClassRanksModel self.ranksProxyModel = self.noClassRanksProxyModel self.measures = [] self.selected_checks = set() self.reg_scoring_box.setSizePolicy(QSizePolicy.Ignored, QSizePolicy.Ignored) self.cls_scoring_box.setSizePolicy(QSizePolicy.Ignored, QSizePolicy.Ignored) shape = (len(self.measures) + len(self.learners), 0) self.measure_scores = table(shape, None) self.update_scores = False for check, score in zip(self.score_checks, SCORES): check.setChecked(score.name in self.selected_checks) self.update_scores = True self.score_stack.setCurrentIndex(index) self.updateVisibleScoreColumns() @check_sql_input def setData(self, data): self.closeContext() self.clear_messages() self.resetInternals() self.data = data self.switchRanksMode(0) if self.data is not None: domain = self.data.domain attrs = domain.attributes self.usefulAttributes = [ attr for attr in attrs if attr.is_discrete or attr.is_continuous ] if domain.has_continuous_class: self.switchRanksMode(1) elif not domain.class_var: self.Warning.no_target_var() self.switchRanksMode(2) elif not domain.has_discrete_class: self.Error.invalid_type(type(domain.class_var).__name__) if issparse( self.data.X): # keep only measures supporting sparse data self.measures = [ m for m in self.measures if m.score.supports_sparse_data ] self.ranksModel.setRowCount(len(attrs)) for i, a in enumerate(attrs): if a.is_discrete: v = len(a.values) else: v = "C" item = ScoreValueItem() item.setData(v, Qt.DisplayRole) self.ranksModel.setItem(i, 0, item) item = QStandardItem(a.name) item.setData(gui.attributeIconDict[a], Qt.DecorationRole) self.ranksModel.setVerticalHeaderItem(i, item) shape = (len(self.measures) + len(self.learners), len(attrs)) self.measure_scores = table(shape, None) self.updateScores() else: self.send("Scores", None) self.selected_rows = [] self.openContext(data) self.selectMethodChanged() self.commit() def get_selection(self): selection = self.ranksView.selectionModel().selection() return list(set(ind.row() for ind in selection.indexes())) def set_learner(self, learner, lid=None): if learner is None and lid is not None: del self.learners[lid] elif learner is not None: self.learners[lid] = ScoreMeta(learner.name, learner.name, learner) attrs_len = 0 if not self.data else len(self.data.domain.attributes) shape = (len(self.learners), attrs_len) self.measure_scores = self.measure_scores[:len(self.measures)] self.measure_scores += table(shape, None) self.contRanksModel.setHorizontalHeaderLabels(self.contRanksLabels) self.discRanksModel.setHorizontalHeaderLabels(self.discRanksLabels) self.noClassRanksModel.setHorizontalHeaderLabels( self.noClassRanksLabels) measures_mask = [False] * len(self.measures) measures_mask += [True for _ in self.learners] self.updateScores(measures_mask) self.commit() def updateScores(self, measuresMask=None): """ Update the current computed scores. If `measuresMask` is given it must be an list of bool values indicating what measures should be recomputed. """ if not self.data: return if self.data.has_missing(): self.information("Missing values have been imputed.") measures = self.measures + [v for k, v in self.learners.items()] if measuresMask is None: # Update all selected measures measuresMask = [ self.selectedMeasures.get(m.name) for m in self.measures ] measuresMask = measuresMask + [ v.name for k, v in self.learners.items() ] data = self.data learner_col = len(self.measures) if len(measuresMask) <= len(self.measures) or \ measuresMask[len(self.measures)]: self.labels = [] self.Error.inadequate_learner.clear() self.setStatusMessage("Running") with self.progressBar(): n_measure_update = len([x for x in measuresMask if x is not False]) count = 0 for index, (meas, mask) in enumerate(zip(measures, measuresMask)): if not mask: continue self.progressBarSet(90 * count / n_measure_update) count += 1 if index < len(self.measures): estimator = meas.score() try: self.measure_scores[index] = estimator(data) except ValueError: self.measure_scores[index] = [] for attr in data.domain.attributes: try: self.measure_scores[index].append( estimator(data, attr)) except ValueError: self.measure_scores[index].append(None) else: learner = meas.score if isinstance(learner, Learner) and \ not learner.check_learner_adequacy(self.data.domain): self.Error.inadequate_learner( learner.learner_adequacy_err_msg) scores = table((1, len(data.domain.attributes))) else: scores = meas.score.score_data(data) for i, row in enumerate(scores): self.labels.append(meas.shortname + str(i + 1)) if len(self.measure_scores) > learner_col: self.measure_scores[learner_col] = row else: self.measure_scores.append(row) learner_col += 1 self.progressBarSet(90) self.contRanksModel.setHorizontalHeaderLabels(self.contRanksLabels + self.labels) self.discRanksModel.setHorizontalHeaderLabels(self.discRanksLabels + self.labels) self.noClassRanksModel.setHorizontalHeaderLabels( self.noClassRanksLabels + self.labels) self.updateRankModel(measuresMask) self.ranksProxyModel.invalidate() self.selectMethodChanged() self.send("Scores", self.create_scores_table(self.labels)) self.setStatusMessage("") def updateRankModel(self, measuresMask): """ Update the rankModel. """ values = [] diff = len(self.measure_scores) - len(measuresMask) if len(measuresMask): measuresMask += [measuresMask[-1]] * diff for i in range(self.ranksModel.columnCount() - 1, len(self.measure_scores), -1): self.ranksModel.removeColumn(i) for i, (scores, m) in enumerate(zip(self.measure_scores, measuresMask)): if not m and self.ranksModel.item(0, i + 1): values.append([]) continue values_one = [] for j, _score in enumerate(scores): values_one.append(_score) item = self.ranksModel.item(j, i + 1) if not item: item = ScoreValueItem() self.ranksModel.setItem(j, i + 1, item) item.setData(_score, Qt.DisplayRole) values.append(values_one) for i, (vals, m) in enumerate(zip(values, measuresMask)): if not m: continue valid_vals = [v for v in vals if v is not None] if valid_vals: vmin, vmax = min(valid_vals), max(valid_vals) for j, v in enumerate(vals): if v is not None: # Set the bar ratio role for i-th measure. ratio = float((v - vmin) / ((vmax - vmin) or 1)) item = self.ranksModel.item(j, i + 1) item.setData(ratio, gui.BarRatioRole) self.ranksView.setColumnWidth(0, 20) self.ranksView.resizeRowsToContents() def resetInternals(self): self.data = None self.usefulAttributes = [] self.ranksModel.setRowCount(0) def onSelectItem(self, index): """ Called when the user selects/unselects an item in the table view. """ self.selectMethod = OWRank.SelectManual # Manual self.selectButtons.button(self.selectMethod).setChecked(True) self.commit() def setSelectMethod(self, method): if self.selectMethod != method: self.selectMethod = method self.selectButtons.button(method).setChecked(True) self.selectMethodChanged() def selectMethodChanged(self): self.autoSelection() self.ranksView.setFocus() def nSelectedChanged(self): self.selectMethod = OWRank.SelectNBest self.selectButtons.button(self.selectMethod).setChecked(True) self.selectMethodChanged() def autoSelection(self): selModel = self.ranksView.selectionModel() rowCount = self.ranksModel.rowCount() columnCount = self.ranksModel.columnCount() model = self.ranksProxyModel if self.selectMethod == OWRank.SelectNone: selection = QItemSelection() elif self.selectMethod == OWRank.SelectAll: selection = QItemSelection( model.index(0, 0), model.index(rowCount - 1, columnCount - 1)) elif self.selectMethod == OWRank.SelectNBest: nSelected = min(self.nSelected, rowCount) selection = QItemSelection( model.index(0, 0), model.index(nSelected - 1, columnCount - 1)) else: selection = QItemSelection() if len(self.selected_rows): selection = QItemSelection() for row in self.selected_rows: selection.append( QItemSelectionRange(model.index(row, 0), model.index(row, columnCount - 1))) selModel.select(selection, QItemSelectionModel.ClearAndSelect) def headerClick(self, index): if index >= 1 and self.selectMethod == OWRank.SelectNBest: # Reselect the top ranked attributes self.autoSelection() # Store the header states disc = bytes(self.discRanksView.horizontalHeader().saveState()) cont = bytes(self.contRanksView.horizontalHeader().saveState()) no_class = bytes(self.noClassRanksView.horizontalHeader().saveState()) self.headerState = [disc, cont, no_class] def measuresSelectionChanged(self, measure): """Measure selection has changed. Update column visibility. """ checked = self.selectedMeasures[measure.name] self.selectedMeasures[measure.name] = not checked if not checked: self.selected_checks.add(measure.name) elif measure.name in self.selected_checks: self.selected_checks.remove(measure.name) measures_mask = [False] * len(self.measures) measures_mask += [False for _ in self.learners] # Update scores for shown column if they are not yet computed. if measure in self.measures and self.measure_scores: index = self.measures.index(measure) if all(s is None for s in self.measure_scores[index]): measures_mask[index] = True if self.update_scores: self.updateScores(measures_mask) self.updateVisibleScoreColumns() def updateVisibleScoreColumns(self): """ Update the visible columns of the scores view. """ for i, measure in enumerate(self.measures): shown = self.selectedMeasures.get(measure.name) self.ranksView.setColumnHidden(i + 1, not shown) self.ranksView.setColumnWidth(i + 1, 100) index = self.ranksView.horizontalHeader().sortIndicatorSection() if self.ranksView.isColumnHidden(index): self.headerState[self.rankMode] = None if self.headerState[self.rankMode] is None: def get_sort_by_col(measures, selected_measures): cols = [ i + 1 for i, m in enumerate(measures) if m.name in selected_measures ] return cols[0] if cols else len(measures) + 1 col = get_sort_by_col(self.measures, self.selected_checks) self.ranksView.sortByColumn(col, Qt.DescendingOrder) self.autoSelection() def updateDelegates(self): self.contRanksView.setItemDelegate(gui.ColoredBarItemDelegate(self)) self.discRanksView.setItemDelegate(gui.ColoredBarItemDelegate(self)) self.noClassRanksView.setItemDelegate(gui.ColoredBarItemDelegate(self)) def send_report(self): if not self.data: return self.report_domain("Input", self.data.domain) self.report_table("Ranks", self.ranksView, num_format="{:.3f}") if self.out_domain_desc is not None: self.report_items("Output", self.out_domain_desc) def commit(self): self.selected_rows = self.get_selection() if self.data and len(self.data.domain.attributes) == len( self.selected_rows): self.selectMethod = OWRank.SelectAll self.selectButtons.button(self.selectMethod).setChecked(True) selected = self.selectedAttrs() if not self.data or not selected: self.send("Reduced Data", None) self.out_domain_desc = None else: data = Table( Domain(selected, self.data.domain.class_var, self.data.domain.metas), self.data) self.send("Reduced Data", data) self.out_domain_desc = report.describe_domain(data.domain) def selectedAttrs(self): if self.data: inds = self.ranksView.selectionModel().selectedRows(0) source = self.ranksProxyModel.mapToSource inds = map(source, inds) inds = [ind.row() for ind in inds] return [self.data.domain.attributes[i] for i in inds] else: return [] def create_scores_table(self, labels): indices = [ i for i, m in enumerate(self.measures) if self.selectedMeasures.get(m.name, False) ] measures = [ s.name for s in self.measures if self.selectedMeasures.get(s.name, False) ] measures += [label for label in labels] if not measures: return None features = [ContinuousVariable(s) for s in measures] metas = [StringVariable("Feature name")] domain = Domain(features, metas=metas) scores = np.nan_to_num( np.array([ row for i, row in enumerate(self.measure_scores) if i in indices or i >= len(self.measures) ], dtype=np.float64).T) feature_names = np.array([a.name for a in self.data.domain.attributes]) # Reshape to 2d array as Table does not like 1d arrays feature_names = feature_names[:, None] new_table = Table(domain, scores, metas=feature_names) new_table.name = "Feature Scores" return new_table @classmethod def migrate_settings(cls, settings, version): if not version: # Before fc5caa1e1d716607f1f5c4e0b0be265c23280fa0 # headerState had length 2 headerState = settings.get("headerState", None) if headerState is not None and \ isinstance(headerState, tuple) and \ len(headerState) < 3: headerState = (list(headerState) + [None] * 3)[:3] settings["headerState"] = headerState
def __init__(self): OWWidget.__init__(self) ConcurrentWidgetMixin.__init__(self) # ATTRIBUTES # self.target_database = ENTREZ_ID # input data self.input_data = None self.input_genes = None self.tax_id = None self.column_candidates = [] # input options self.organisms = [] # gene matcher self.gene_matcher = None # progress bar self.progress_bar = None self._timer = QTimer() self._timer.timeout.connect(self._apply_filter) self._timer.setSingleShot(True) # GUI SECTION # # Control area self.info_box = widgetLabel( widgetBox(self.controlArea, "Info", addSpace=True), 'No data on input.\n') organism_box = vBox(self.controlArea, 'Organism') self.organism_select_combobox = comboBox( organism_box, self, 'selected_organism', callback=self.on_input_option_change) self.get_available_organisms() self.organism_select_combobox.setCurrentIndex(self.selected_organism) box = widgetBox(self.controlArea, 'Gene IDs in the input data') self.gene_columns_model = itemmodels.DomainModel( valid_types=(StringVariable, DiscreteVariable)) self.gene_column_combobox = comboBox( box, self, 'selected_gene_col', label='Stored in data column', model=self.gene_columns_model, sendSelectedValue=True, callback=self.on_input_option_change, ) self.attr_names_checkbox = checkBox( box, self, 'use_attr_names', 'Stored as feature (column) names', disables=[(-1, self.gene_column_combobox)], callback=self.on_input_option_change, ) self.gene_column_combobox.setDisabled(bool(self.use_attr_names)) output_box = vBox(self.controlArea, 'Output') # separator(output_box) # output_box.layout().addWidget(horizontal_line()) # separator(output_box) self.exclude_radio = checkBox(output_box, self, 'exclude_unmatched', 'Exclude unmatched genes', callback=self.commit) self.replace_radio = checkBox(output_box, self, 'replace_id_with_symbol', 'Replace feature IDs with gene names', callback=self.commit) auto_commit(self.controlArea, self, "auto_commit", "&Commit", box=False) rubber(self.controlArea) # Main area self.filter = lineEdit(self.mainArea, self, 'search_pattern', 'Filter:', callbackOnType=True, callback=self.handle_filter_callback) # rubber(self.radio_group) self.mainArea.layout().addWidget(self.filter) # set splitter self.splitter = QSplitter() self.splitter.setOrientation(Qt.Vertical) self.table_model = GeneInfoModel() self.table_view = QTableView() self.table_view.setAlternatingRowColors(True) self.table_view.viewport().setMouseTracking(True) self.table_view.setSortingEnabled(True) self.table_view.setShowGrid(False) self.table_view.verticalHeader().hide() # self.table_view.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch) self.unknown_model = UnknownGeneInfoModel() self.unknown_view = QTableView() self.unknown_view.setModel(self.unknown_model) self.unknown_view.verticalHeader().hide() self.unknown_view.setShowGrid(False) self.unknown_view.setSelectionMode(QAbstractItemView.NoSelection) self.unknown_view.horizontalHeader().setSectionResizeMode( QHeaderView.Stretch) self.splitter.addWidget(self.table_view) self.splitter.addWidget(self.unknown_view) self.splitter.setStretchFactor(0, 90) self.splitter.setStretchFactor(1, 10) self.mainArea.layout().addWidget(self.splitter)
class OWGenes(OWWidget, ConcurrentWidgetMixin): name = "Genes" description = "Tool for working with genes" icon = "../widgets/icons/OWGeneInfo.svg" priority = 5 want_main_area = True selected_organism: int = Setting(11) search_pattern: str = Setting('') exclude_unmatched = Setting(True) replace_id_with_symbol = Setting(True) auto_commit = Setting(True) settingsHandler = DomainContextHandler() selected_gene_col = ContextSetting(None) use_attr_names = ContextSetting(True) replaces = [ 'orangecontrib.bioinformatics.widgets.OWGeneNameMatcher.OWGeneNameMatcher' ] class Inputs: data_table = Input("Data", Table) class Outputs: data_table = Output("Data", Table) gene_matcher_results = Output("Genes", Table) class Information(OWWidget.Information): pass def sizeHint(self): return QSize(1280, 960) def __init__(self): OWWidget.__init__(self) ConcurrentWidgetMixin.__init__(self) # ATTRIBUTES # self.target_database = ENTREZ_ID # input data self.input_data = None self.input_genes = None self.tax_id = None self.column_candidates = [] # input options self.organisms = [] # gene matcher self.gene_matcher = None # progress bar self.progress_bar = None self._timer = QTimer() self._timer.timeout.connect(self._apply_filter) self._timer.setSingleShot(True) # GUI SECTION # # Control area self.info_box = widgetLabel( widgetBox(self.controlArea, "Info", addSpace=True), 'No data on input.\n') organism_box = vBox(self.controlArea, 'Organism') self.organism_select_combobox = comboBox( organism_box, self, 'selected_organism', callback=self.on_input_option_change) self.get_available_organisms() self.organism_select_combobox.setCurrentIndex(self.selected_organism) box = widgetBox(self.controlArea, 'Gene IDs in the input data') self.gene_columns_model = itemmodels.DomainModel( valid_types=(StringVariable, DiscreteVariable)) self.gene_column_combobox = comboBox( box, self, 'selected_gene_col', label='Stored in data column', model=self.gene_columns_model, sendSelectedValue=True, callback=self.on_input_option_change, ) self.attr_names_checkbox = checkBox( box, self, 'use_attr_names', 'Stored as feature (column) names', disables=[(-1, self.gene_column_combobox)], callback=self.on_input_option_change, ) self.gene_column_combobox.setDisabled(bool(self.use_attr_names)) output_box = vBox(self.controlArea, 'Output') # separator(output_box) # output_box.layout().addWidget(horizontal_line()) # separator(output_box) self.exclude_radio = checkBox(output_box, self, 'exclude_unmatched', 'Exclude unmatched genes', callback=self.commit) self.replace_radio = checkBox(output_box, self, 'replace_id_with_symbol', 'Replace feature IDs with gene names', callback=self.commit) auto_commit(self.controlArea, self, "auto_commit", "&Commit", box=False) rubber(self.controlArea) # Main area self.filter = lineEdit(self.mainArea, self, 'search_pattern', 'Filter:', callbackOnType=True, callback=self.handle_filter_callback) # rubber(self.radio_group) self.mainArea.layout().addWidget(self.filter) # set splitter self.splitter = QSplitter() self.splitter.setOrientation(Qt.Vertical) self.table_model = GeneInfoModel() self.table_view = QTableView() self.table_view.setAlternatingRowColors(True) self.table_view.viewport().setMouseTracking(True) self.table_view.setSortingEnabled(True) self.table_view.setShowGrid(False) self.table_view.verticalHeader().hide() # self.table_view.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch) self.unknown_model = UnknownGeneInfoModel() self.unknown_view = QTableView() self.unknown_view.setModel(self.unknown_model) self.unknown_view.verticalHeader().hide() self.unknown_view.setShowGrid(False) self.unknown_view.setSelectionMode(QAbstractItemView.NoSelection) self.unknown_view.horizontalHeader().setSectionResizeMode( QHeaderView.Stretch) self.splitter.addWidget(self.table_view) self.splitter.addWidget(self.unknown_view) self.splitter.setStretchFactor(0, 90) self.splitter.setStretchFactor(1, 10) self.mainArea.layout().addWidget(self.splitter) def handle_filter_callback(self): self._timer.stop() self._timer.start(500) def _apply_filter(self): # filter only if input data is present and model is populated if self.table_model.table is not None: self.table_model.update_model( filter_pattern=str(self.search_pattern)) self.commit() def __reset_widget_state(self): self.table_view.clearSpans() self.table_view.setModel(None) self.table_model.clear() self.unknown_model.clear() self._update_info_box() def _update_info_box(self): if self.input_genes and self.gene_matcher: num_genes = len(self.gene_matcher.genes) known_genes = len(self.gene_matcher.get_known_genes()) info_text = ('{} genes in input data\n' '{} genes match Entrez database\n' '{} genes with match conflicts\n'.format( num_genes, known_genes, num_genes - known_genes)) else: info_text = 'No data on input.' self.info_box.setText(info_text) def on_done(self, _): # update info box self._update_info_box() # set output options self.toggle_radio_options() # set known genes self.table_model.initialize(self.gene_matcher.genes) self.table_view.setModel(self.table_model) self.table_view.selectionModel().selectionChanged.connect(self.commit) self.table_view.setSelectionBehavior(QAbstractItemView.SelectRows) self.table_view.setItemDelegateForColumn( self.table_model.entrez_column_index, LinkStyledItemDelegate(self.table_view)) v_header = self.table_view.verticalHeader() option = self.table_view.viewOptions() size = self.table_view.style().sizeFromContents( QStyle.CT_ItemViewItem, option, QSize(20, 20), self.table_view) v_header.setDefaultSectionSize(size.height() + 2) v_header.setMinimumSectionSize(5) self.table_view.horizontalHeader().setStretchLastSection(True) # set unknown genes self.unknown_model.initialize(self.gene_matcher.genes) self.unknown_view.verticalHeader().setStretchLastSection(True) self._apply_filter() def get_available_organisms(self): available_organism = sorted(((tax_id, taxonomy.name(tax_id)) for tax_id in taxonomy.common_taxids()), key=lambda x: x[1]) self.organisms = [tax_id[0] for tax_id in available_organism] self.organism_select_combobox.addItems( [tax_id[1] for tax_id in available_organism]) def gene_names_from_table(self): """ Extract and return gene names from `Orange.data.Table`. """ self.input_genes = [] if self.input_data: if self.use_attr_names: self.input_genes = [ str(attr.name).strip() for attr in self.input_data.domain.attributes ] else: if self.selected_gene_col is None: self.selected_gene_col = self.gene_column_identifier() self.input_genes = [ str(e[self.selected_gene_col]) for e in self.input_data if not np.isnan(e[self.selected_gene_col]) ] def _update_gene_matcher(self): self.gene_names_from_table() self.gene_matcher = GeneMatcher(self.get_selected_organism(), auto_start=False) self.gene_matcher.genes = self.input_genes # self.gene_matcher.organism = self.get_selected_organism() def get_selected_organism(self): return self.organisms[self.selected_organism] def _run(self): if self.gene_matcher is not None: self.start(run_gene_matcher, self.gene_matcher) def on_input_option_change(self): self.__reset_widget_state() self._update_gene_matcher() self._run() def gene_column_identifier(self): """ Get most suitable column that stores genes. If there are several suitable columns, select the one with most unique values. Take the best one. """ # candidates -> (variable, num of unique values) candidates = ((col, np.unique(self.input_data.get_column_view(col)[0]).size) for col in self.gene_columns_model if isinstance(col, DiscreteVariable) or isinstance(col, StringVariable)) best_candidate, _ = sorted(candidates, key=lambda x: x[1])[-1] return best_candidate def find_genes_location(self): """ Try locate the genes in the input data when we first load the data. Proposed rules: - when no suitable feature names are present, check the columns. - find the most suitable column, that is, the one with most unique values. """ domain = self.input_data.domain if not domain.attributes: if self.selected_gene_col is None: self.selected_gene_col = self.gene_column_identifier() self.use_attr_names = False @Inputs.data_table def handle_input(self, data): self.closeContext() self.input_data = None self.input_genes = None self.__reset_widget_state() self.gene_columns_model.set_domain(None) self.selected_gene_col = None if data: self.input_data = data self.gene_columns_model.set_domain(self.input_data.domain) # check if input table has tax_id, human is used if tax_id is not found self.tax_id = str(self.input_data.attributes.get(TAX_ID, '9606')) # check for gene location. Default is that genes are attributes in the input table. self.use_attr_names = self.input_data.attributes.get( GENE_AS_ATTRIBUTE_NAME, self.use_attr_names) if self.tax_id in self.organisms and not self.selected_organism: self.selected_organism = self.organisms.index(self.tax_id) self.openContext(self.input_data.domain) self.find_genes_location() self.on_input_option_change() def commit(self): selection = self.table_view.selectionModel().selectedRows( self.table_model.entrez_column_index) selected_genes = [row.data() for row in selection] if not len(selected_genes): selected_genes = self.table_model.get_filtered_genes() gene_ids = self.get_target_ids() known_genes = [gid for gid in gene_ids if gid != '?'] table = None gm_table = None if known_genes: # Genes are in rows (we have a column with genes). if not self.use_attr_names: if self.target_database in self.input_data.domain: gene_var = self.input_data.domain[self.target_database] metas = self.input_data.domain.metas else: gene_var = StringVariable(self.target_database) metas = self.input_data.domain.metas + (gene_var, ) domain = Domain(self.input_data.domain.attributes, self.input_data.domain.class_vars, metas) table = self.input_data.transform(domain) col, _ = table.get_column_view(gene_var) col[:] = gene_ids # filter selected rows selected_genes_set = set(selected_genes) selected_rows = [ row_index for row_index, row in enumerate(table) if str(row[gene_var]) in selected_genes_set ] # handle table attributes table.attributes[TAX_ID] = self.get_selected_organism() table.attributes[GENE_AS_ATTRIBUTE_NAME] = False table.attributes[GENE_ID_COLUMN] = self.target_database table = table[selected_rows] if selected_rows else table if self.exclude_unmatched: # create filter from selected column for genes only_known = table_filter.FilterStringList( gene_var, known_genes) # apply filter to the data table = table_filter.Values([only_known])(table) self.Outputs.data_table.send(table) # genes are are in columns (genes are features). else: domain = self.input_data.domain.copy() table = self.input_data.transform(domain) for gene in self.gene_matcher.genes: if gene.input_identifier in table.domain: table.domain[gene.input_identifier].attributes[ self.target_database] = (str(gene.gene_id) if gene.gene_id else '?') if self.replace_id_with_symbol: try: table.domain[gene.input_identifier].name = str( gene.symbol) except AttributeError: # TODO: missing gene symbol, need to handle this? pass # filter selected columns selected_genes_set = set(selected_genes) selected = [ column for column in table.domain.attributes if self.target_database in column.attributes and str(column.attributes[ self.target_database]) in selected_genes_set ] output_attrs = table.domain.attributes if selected: output_attrs = selected if self.exclude_unmatched: known_genes_set = set(known_genes) output_attrs = [ col for col in output_attrs if col.attributes[ self.target_database] in known_genes_set ] domain = Domain(output_attrs, table.domain.class_vars, table.domain.metas) table = table.from_table(domain, table) # handle table attributes table.attributes[TAX_ID] = self.get_selected_organism() table.attributes[GENE_AS_ATTRIBUTE_NAME] = True table.attributes[GENE_ID_ATTRIBUTE] = self.target_database gm_table = self.gene_matcher.to_data_table( selected_genes=selected_genes if selected_genes else None) self.Outputs.data_table.send(table) self.Outputs.gene_matcher_results.send(gm_table) def toggle_radio_options(self): self.replace_radio.setEnabled(bool(self.use_attr_names)) if self.gene_matcher.genes: # enable checkbox if unknown genes are detected self.exclude_radio.setEnabled( len(self.gene_matcher.genes) != len( self.gene_matcher.get_known_genes())) self.exclude_unmatched = len(self.gene_matcher.genes) != len( self.gene_matcher.get_known_genes()) def get_target_ids(self): return [ str(gene.gene_id) if gene.gene_id else '?' for gene in self.gene_matcher.genes ]
class OWClusterAnalysis(OWWidget): name = "Cluster Analysis" description = ( "The widget displays differentially expressed genes that characterize the cluster, " "and corresponding gene terms that describe differentially expressed genes" ) icon = "../widgets/icons/OWClusterAnalysis.svg" priority = 100 class Inputs: data_table = Input('Data', Table) custom_sets = Input('Custom Gene Sets', Table) class Outputs: selected_data = Output('Selected Data', Table) gene_scores = Output('Gene Scores', Table) gene_set_scores = Output('Gene Set Scores', Table) class Information(OWWidget.Information): pass class Warning(OWWidget.Warning): gene_enrichment = Msg('{}, {}.') no_selected_gene_sets = Msg( 'No gene set selected, select them from Gene Sets box.') class Error(OWWidget.Error): no_cluster_indicator = Msg('No cluster indicator in the input data') gene_as_attributes = Msg( 'Genes, in the input data, are expected as column names') organism_mismatch = Msg( 'Organism in input data and custom gene sets does not match') cluster_batch_conflict = Msg( 'Cluster and batch must not be the same variable') settingsHandler = ClusterAnalysisContextHandler() cluster_indicators = ContextSetting([]) batch_indicator = ContextSetting(None) stored_gene_sets_selection = ContextSetting(()) scoring_method_selection = ContextSetting(0) scoring_method_design = ContextSetting(0) scoring_test_type = ContextSetting(0) # genes filter max_gene_count = Setting(20) use_gene_count_filter = Setting(True) max_gene_p_value = Setting(0.1) use_gene_pval_filter = Setting(False) max_gene_fdr = Setting(0.1) use_gene_fdr_filter = Setting(True) # gene sets filter min_gs_count = Setting(5) use_gs_count_filter = Setting(True) max_gs_p_value = Setting(0.1) use_gs_pval_filter = Setting(False) max_gs_fdr = Setting(0.1) use_gs_max_fdr = Setting(True) # auto commit results auto_commit = settings.Setting(False) custom_gene_set_indicator = settings.Setting(None) def __init__(self): super().__init__() # widget attributes self.input_data = None self.store_input_domain = None self.input_genes_names = [] self.input_genes_ids = [] self.tax_id = None self.use_attr_names = None self.gene_id_attribute = None # custom gene set input self.feature_model = itemmodels.DomainModel( valid_types=(DiscreteVariable, StringVariable)) self.custom_data = None self.custom_tax_id = None self.custom_use_attr_names = None self.custom_gene_id_attribute = None self.custom_gene_id_column = None self.num_of_custom_sets = None self.rows_by_cluster = None self.rows_by_batch = None self.clusters = [] self.new_cluster_profile = [] # data model self.cluster_info_model = None # Info info_box = vBox(self.controlArea, 'Info') self.input_info = widgetLabel(info_box) # Cluster selection self.cluster_indicator_model = itemmodels.DomainModel( valid_types=(DiscreteVariable, ), separators=False) self.cluster_indicator_box = widgetBox(self.controlArea, 'Cluster Indicator') self.cluster_indicator_view = listView( self.cluster_indicator_box, self, 'cluster_indicators', model=self.cluster_indicator_model, selectionMode=QListWidget.MultiSelection, callback=self.invalidate, sizeHint=QSize(256, 70), ) # Batch selection self.batch_indicator_model = itemmodels.DomainModel( valid_types=(DiscreteVariable, ), separators=False, placeholder="") box = widgetBox(self.controlArea, 'Batch Indicator') self.batch_indicator_combobox = comboBox( box, self, 'batch_indicator', model=self.batch_indicator_model, sendSelectedValue=True, callback=self.batch_indicator_changed, ) # Gene scoring box = widgetBox(self.controlArea, 'Gene Scoring') self.gene_scoring = GeneScoringWidget(box, self) self.gene_scoring.set_method_selection_area('scoring_method_selection') self.gene_scoring.set_method_design_area('scoring_method_design') self.gene_scoring.set_test_type('scoring_test_type') # Gene Sets widget gene_sets_box = widgetBox(self.controlArea, "Gene Sets") self.gs_widget = GeneSetsSelection(gene_sets_box, self, 'stored_gene_sets_selection') self.gs_widget.hierarchy_tree_widget.itemClicked.connect( self.__gene_sets_enrichment) # custom gene sets area box = vBox(self.controlArea, "Custom Gene Sets") if self.custom_gene_set_indicator not in self.feature_model: self.custom_gene_set_indicator = None self.gs_label_combobox = comboBox( box, self, "custom_gene_set_indicator", sendSelectedValue=True, model=self.feature_model, callback=self.handle_custom_gene_sets, ) self.gs_label_combobox.setDisabled(True) # main area splitter = QSplitter(Qt.Horizontal, self.mainArea) self.mainArea.layout().addWidget(splitter) genes_filter = widgetBox(splitter, 'Filter Genes', orientation=QHBoxLayout()) spin( genes_filter, self, 'max_gene_count', 0, 10000, label='Count', tooltip='Minimum genes count', checked='use_gene_count_filter', callback=self.filter_genes, callbackOnReturn=True, checkCallback=self.filter_genes, ) doubleSpin( genes_filter, self, 'max_gene_p_value', 0.0, 1.0, 0.0001, label='p-value', tooltip='Maximum p-value of the enrichment score', checked='use_gene_pval_filter', callback=self.filter_genes, callbackOnReturn=True, checkCallback=self.filter_genes, ) doubleSpin( genes_filter, self, 'max_gene_fdr', 0.0, 1.0, 0.0001, label='FDR', tooltip='Maximum false discovery rate', checked='use_gene_fdr_filter', callback=self.filter_genes, callbackOnReturn=True, checkCallback=self.filter_genes, ) gene_sets_filter = widgetBox(splitter, 'Filter Gene Sets', orientation=QHBoxLayout()) spin( gene_sets_filter, self, 'min_gs_count', 0, DISPLAY_GENE_SETS_COUNT, label='Count', tooltip='Minimum genes count', checked='use_gs_count_filter', callback=self.filter_gene_sets, callbackOnReturn=True, checkCallback=self.filter_gene_sets, ) doubleSpin( gene_sets_filter, self, 'max_gs_p_value', 0.0, 1.0, 0.0001, label='p-value', tooltip='Maximum p-value of the enrichment score', checked='use_gs_pval_filter', callback=self.filter_gene_sets, callbackOnReturn=True, checkCallback=self.filter_gene_sets, ) doubleSpin( gene_sets_filter, self, 'max_gs_fdr', 0.0, 1.0, 0.0001, label='FDR', tooltip='Maximum false discovery rate', checked='use_gs_max_fdr', callback=self.filter_gene_sets, callbackOnReturn=True, checkCallback=self.filter_gene_sets, ) self.cluster_info_view = QTableView() self.cluster_info_view.verticalHeader().setVisible(False) self.cluster_info_view.setItemDelegate(HTMLDelegate()) self.cluster_info_view.horizontalHeader().hide() self.cluster_info_view.horizontalHeader().setSectionResizeMode( QHeaderView.Stretch) auto_commit(self.controlArea, self, "auto_commit", "&Commit", box=False) self.mainArea.layout().addWidget(self.cluster_info_view) def sizeHint(self): return QSize(800, 600) def __update_info_box(self): info_string = '' if self.input_genes_ids: info_string += '{} samples, {} clusters\n'.format( self.input_data.X.shape[0], len(self.clusters) if self.clusters else '?') info_string += '{:,d} unique genes\n'.format( len(self.input_genes_ids)) else: info_string += 'No genes on input.\n' if self.custom_data: info_string += '{} marker genes in {} sets\n'.format( self.custom_data.X.shape[0], self.num_of_custom_sets) self.input_info.setText(info_string) def __set_cluster_info_model(self): self.cluster_info_view.setModel(None) self.cluster_info_model = ClusterModel(self) self.cluster_info_model.add_rows(self.clusters) # add model to the view self.cluster_info_view.setModel(self.cluster_info_model) # call sizeHint function self.cluster_info_view.resizeRowsToContents() self.cluster_info_view.selectionModel().selectionChanged.connect( self.commit) def __create_temp_class_var(self): """ See no evil !""" cluster_indicator_name = 'Cluster indicators' row_profile = None new_cluster_values = [] var_index_lookup = { val: idx for var in self.cluster_indicators for idx, val in enumerate(var.values) } cart_prod = itertools.product( *[cluster.values for cluster in self.cluster_indicators]) for comb in cart_prod: new_cluster_values.append(', '.join([val for val in comb])) self.new_cluster_profile.append( [var_index_lookup[val] for val in comb]) row_profile_lookup = { tuple(profile): indx for indx, (profile, _) in enumerate( zip(self.new_cluster_profile, new_cluster_values)) } for var in self.cluster_indicators: if row_profile is None: row_profile = np.asarray( self.input_data.get_column_view(var)[0], dtype=int) else: row_profile = np.vstack( (row_profile, np.asarray(self.input_data.get_column_view(var)[0], dtype=int))) ca_ind = DiscreteVariable.make( cluster_indicator_name, values=[val for val in new_cluster_values], ordered=True) domain = Domain( self.input_data.domain.attributes, self.input_data.domain.class_vars, self.input_data.domain.metas + (ca_ind, ), ) table = self.input_data.transform(domain) table[:, ca_ind] = np.array( [[row_profile_lookup[tuple(row_profile[:, i])]] for i in range(row_profile.shape[1])]) self.input_data = table return ca_ind def __set_clusters(self): self.clusters = [] self.new_cluster_profile = [] self.cluster_var = None if self.cluster_indicators and self.input_data: if isinstance(self.cluster_indicators, list) and len(self.cluster_indicators) > 1: self.cluster_var = self.__create_temp_class_var() else: self.cluster_var = self.cluster_indicators[0] self.rows_by_cluster = np.asarray(self.input_data.get_column_view( self.cluster_var)[0], dtype=int) for index, name in enumerate(self.cluster_var.values): cluster = Cluster(name, index) self.clusters.append(cluster) cluster.set_genes(self.input_genes_names, self.input_genes_ids) def __set_batch(self): self.Error.cluster_batch_conflict.clear() self.rows_by_batch = None if self.batch_indicator == self.cluster_var: self.Error.cluster_batch_conflict() return if self.batch_indicator and self.input_data: self.rows_by_batch = np.asarray(self.input_data.get_column_view( self.batch_indicator)[0], dtype=int) def __set_genes(self): self.input_genes_names = [] self.input_genes_ids = [] if self.use_attr_names: for variable in self.input_data.domain.attributes: self.input_genes_names.append(str(variable.name)) self.input_genes_ids.append( str(variable.attributes.get(self.gene_id_attribute, np.nan))) def filter_genes(self): if self.cluster_info_model: # filter genes # note: after gene filter is applied, we need to recalculate gene set enrichment self.cluster_info_model.apply_gene_filters( self.max_gene_p_value if self.use_gene_pval_filter else None, self.max_gene_fdr if self.use_gene_fdr_filter else None, self.max_gene_count if self.use_gene_count_filter else None, ) # recalculate gene set enrichment self.__gene_sets_enrichment() # call sizeHint function self.cluster_info_view.resizeRowsToContents() # commit changes after filter self.commit() def filter_gene_sets(self): if self.cluster_info_model: # filter gene sets self.cluster_info_model.apply_gene_sets_filters( self.max_gs_p_value if self.use_gs_pval_filter else None, self.max_gs_fdr if self.use_gs_max_fdr else None, self.min_gs_count if self.use_gs_count_filter else None, ) # call sizeHint function self.cluster_info_view.resizeRowsToContents() def __gene_enrichment(self): design = bool(self.gene_scoring.get_selected_desig() ) # if true cluster vs. cluster else cluster vs rest test_type = self.gene_scoring.get_selected_test_type() method = self.gene_scoring.get_selected_method() try: if method.score_function == score_hypergeometric_test: values = set(np.unique(self.input_data.X)) if (0 not in values) or (len(values) != 2): raise ValueError('Binary data expected (use Preprocess)') self.cluster_info_model.score_genes( design=design, table_x=self.input_data.X, rows_by_cluster=self.rows_by_cluster, rows_by_batch=self.rows_by_batch, method=method, alternative=test_type, ) except ValueError as e: self.Warning.gene_enrichment(str(e), 'p-values are set to 1') def __gene_sets_enrichment(self): if self.input_data: self.Warning.no_selected_gene_sets.clear() all_sets = self.gs_widget.get_hierarchies() selected_sets = self.gs_widget.get_hierarchies(only_selected=True) if len(selected_sets) == 0 and len(all_sets) > 0: self.Warning.no_selected_gene_sets() # save setting on selected hierarchies self.stored_gene_sets_selection = tuple(selected_sets) ref_genes = set(self.input_genes_ids) try: self.cluster_info_model.gene_sets_enrichment( self.gs_widget.gs_object, selected_sets, ref_genes) except Exception as e: # TODO: possible exceptions? raise e self.filter_gene_sets() def invalidate(self, cluster_init=True): if self.input_data is not None and self.tax_id is not None: self.Warning.gene_enrichment.clear() if self.cluster_info_model is not None: self.cluster_info_model.cancel() self.__set_genes() if cluster_init: self.__set_clusters() self.__set_batch() self.__set_cluster_info_model() # note: when calling self.__gene_enrichment we calculate gse automatically. # No need to call self.__gene_sets_enrichment here self.__gene_enrichment() self.__update_info_box() def batch_indicator_changed(self): self.invalidate(cluster_init=False) @Inputs.data_table def handle_input(self, data): self.closeContext() self.Warning.clear() self.Error.clear() self.input_data = None self.store_input_domain = None self.stored_gene_sets_selection = () self.input_genes_names = [] self.input_genes_ids = [] self.tax_id = None self.use_attr_names = None self.gene_id_attribute = None self.clusters = None self.gs_widget.clear() self.gs_widget.clear_gene_sets() self.cluster_info_view.setModel(None) self.cluster_indicators = [] self.cluster_var = None self.batch_indicator = None self.cluster_indicator_model.set_domain(None) self.batch_indicator_model.set_domain(None) self.__update_info_box() if data: self.input_data = data self.cluster_indicator_model.set_domain(self.input_data.domain) self.batch_indicator_model.set_domain(self.input_data.domain) # For Cluster Indicator do not use categorical variables that contain only one value. self.cluster_indicator_model.wrap([ item for item in self.cluster_indicator_model if len(item.values) > 1 ]) # First value in batch indicator model is a NoneType, # we can skip it when we validate categorical variables self.batch_indicator_model.wrap(self.batch_indicator_model[:1] + [ item for item in self.batch_indicator_model[1:] if len(item.values) > 1 ]) self.tax_id = self.input_data.attributes.get(TAX_ID, None) self.use_attr_names = self.input_data.attributes.get( GENE_AS_ATTRIBUTE_NAME, None) self.gene_id_attribute = self.input_data.attributes.get( GENE_ID_ATTRIBUTE, None) if not self.cluster_indicator_model: self.Error.no_cluster_indicator() return elif not self.use_attr_names: self.Error.gene_as_attributes() return self.openContext(self.input_data.domain) self.gs_widget.load_gene_sets(self.tax_id) if self.cluster_indicator_model and len( self.cluster_indicators) < 1: self.cluster_indicators = [self.cluster_indicator_model[0]] if self.batch_indicator_model and self.batch_indicator is None: self.batch_indicator = self.batch_indicator_model[0] self.invalidate() if self.custom_data: self.refresh_custom_gene_sets() self._handle_future_model() self.handle_custom_gene_sets() @Inputs.custom_sets def handle_custom_input(self, data): self.Error.clear() self.Warning.clear() self.closeContext() self.custom_data = None self.custom_tax_id = None self.custom_use_attr_names = None self.custom_gene_id_attribute = None self.custom_gene_id_column = None self.num_of_custom_sets = None self.feature_model.set_domain(None) if data: self.custom_data = data self.feature_model.set_domain(self.custom_data.domain) self.custom_tax_id = str( self.custom_data.attributes.get(TAX_ID, None)) self.custom_use_attr_names = self.custom_data.attributes.get( GENE_AS_ATTRIBUTE_NAME, None) self.custom_gene_id_attribute = self.custom_data.attributes.get( GENE_ID_ATTRIBUTE, None) self.custom_gene_id_column = self.custom_data.attributes.get( GENE_ID_COLUMN, None) self._handle_future_model() if self.input_data: self.openContext(self.input_data.domain) self.gs_label_combobox.setDisabled(True) self.refresh_custom_gene_sets() self.handle_custom_gene_sets(select_customs_flag=True) def __check_organism_mismatch(self): """ Check if organisms from different inputs match. :return: True if there is a mismatch """ if self.tax_id is not None and self.custom_tax_id is not None: return self.tax_id != self.custom_tax_id return False def _handle_future_model(self): if self.custom_gene_set_indicator in self.feature_model: index = self.feature_model.indexOf(self.custom_gene_set_indicator) self.custom_gene_set_indicator = self.feature_model[index] else: if self.feature_model: self.custom_gene_set_indicator = self.feature_model[0] else: self.custom_gene_set_indicator = None def handle_custom_gene_sets(self, select_customs_flag=False): if self.custom_gene_set_indicator: if self.custom_data is not None and self.custom_gene_id_column is not None: if self.__check_organism_mismatch(): self.gs_label_combobox.setDisabled(True) self.Error.organism_mismatch() self.gs_widget.update_gs_hierarchy() self.__gene_sets_enrichment() return if isinstance(self.custom_gene_set_indicator, DiscreteVariable): labels = self.custom_gene_set_indicator.values gene_sets_names = [ labels[int(idx)] for idx in self.custom_data.get_column_view( self.custom_gene_set_indicator)[0] ] else: gene_sets_names, _ = self.custom_data.get_column_view( self.custom_gene_set_indicator) self.num_of_custom_sets = len(set(gene_sets_names)) gene_names, _ = self.custom_data.get_column_view( self.custom_gene_id_column) hierarchy_title = (self.custom_data.name if self.custom_data.name else 'Custom sets', ) try: self.gs_widget.add_custom_sets( gene_sets_names, gene_names, hierarchy_title=hierarchy_title, select_customs_flag=select_customs_flag, ) except GeneSetException: pass self.gs_label_combobox.setDisabled(False) else: self.gs_widget.update_gs_hierarchy() self.__gene_sets_enrichment() self.__update_info_box() def refresh_custom_gene_sets(self): self.gs_widget.clear_custom_sets() # self.gs_widget.update_gs_hierarchy() def gene_scores_output(self, selected_clusters): metas = [ StringVariable('Gene'), StringVariable(ENTREZ_ID), StringVariable('Rank'), ContinuousVariable('Statistic score'), ContinuousVariable('P-value'), ContinuousVariable('FDR'), ] if len(self.new_cluster_profile): # note: order is important metas = self.cluster_indicators + metas domain = Domain([], metas=metas, class_vars=self.cluster_var) data = [] for cluster in selected_clusters: num_of_genes = len(cluster.filtered_genes) scores = [gene.score for gene in cluster.filtered_genes] p_vals = [gene.p_val for gene in cluster.filtered_genes] fdr_vals = [gene.fdr for gene in cluster.filtered_genes] gene_names = [gene.input_name for gene in cluster.filtered_genes] gene_ids = [gene.ncbi_id for gene in cluster.filtered_genes] rank = rankdata(p_vals, method='min') if len(self.new_cluster_profile): profiles = [[cluster.index] * num_of_genes] [ profiles.append([p] * num_of_genes) for p in self.new_cluster_profile[cluster.index] ] else: profiles = [[cluster.index] * num_of_genes] for row in zip(*profiles, gene_names, gene_ids, rank, scores, p_vals, fdr_vals): data.append(list(row)) out_data = Table(domain, data) out_data.attributes[TAX_ID] = self.tax_id out_data.attributes[GENE_AS_ATTRIBUTE_NAME] = False out_data.attributes[GENE_ID_COLUMN] = ENTREZ_ID self.Outputs.gene_scores.send(out_data) def gene_set_scores_output(self, selected_clusters): metas = [ StringVariable('Term'), StringVariable('Term ID'), StringVariable('Rank'), ContinuousVariable('P-value'), ContinuousVariable('FDR'), ] if len(self.new_cluster_profile): # note: order is important metas = self.cluster_indicators + metas domain = Domain([], metas=metas, class_vars=self.cluster_var) data = [] for cluster in selected_clusters: num_of_sets = len(cluster.filtered_gene_sets) p_vals = [gs.p_val for gs in cluster.filtered_gene_sets] fdr_vals = [gs.fdr for gs in cluster.filtered_gene_sets] gs_names = [gs.name for gs in cluster.filtered_gene_sets] gs_ids = [gs.gs_id for gs in cluster.filtered_gene_sets] rank = rankdata(p_vals, method='min') if len(self.new_cluster_profile): profiles = [[cluster.index] * num_of_sets] [ profiles.append([p] * num_of_sets) for p in self.new_cluster_profile[cluster.index] ] else: profiles = [[cluster.index] * num_of_sets] for row in zip(*profiles, gs_names, gs_ids, rank, p_vals, fdr_vals): data.append(list(row)) self.Outputs.gene_set_scores.send(Table(domain, data)) def commit(self): selection_model = self.cluster_info_view.selectionModel() selected_rows = selection_model.selectedRows() selected_clusters = [] selected_cluster_indexes = set() selected_cluster_genes = set() if not self.input_data or not selected_rows: self.Outputs.selected_data.send(None) return for sel_row in selected_rows: cluster = sel_row.data() selected_clusters.append(cluster) selected_cluster_indexes.add(cluster.index) [ selected_cluster_genes.add(gene.ncbi_id) for gene in cluster.filtered_genes ] # get columns of selected clusters selected_columns = [ column for column in self.input_data.domain.attributes if self.gene_id_attribute in column.attributes and str(column.attributes[ self.gene_id_attribute]) in selected_cluster_genes ] domain = Domain(selected_columns, self.input_data.domain.class_vars, self.input_data.domain.metas) output_data = self.input_data.from_table(domain, self.input_data) # get rows of selected clusters selected_rows = [ row_index for row_index, col_index in enumerate(self.rows_by_cluster) if col_index in selected_cluster_indexes ] # send to output signal self.Outputs.selected_data.send(output_data[selected_rows]) self.gene_scores_output(selected_clusters) self.gene_set_scores_output(selected_clusters)
def __init__(self): super().__init__() # widget attributes self.input_data = None self.store_input_domain = None self.input_genes_names = [] self.input_genes_ids = [] self.tax_id = None self.use_attr_names = None self.gene_id_attribute = None # custom gene set input self.feature_model = itemmodels.DomainModel( valid_types=(DiscreteVariable, StringVariable)) self.custom_data = None self.custom_tax_id = None self.custom_use_attr_names = None self.custom_gene_id_attribute = None self.custom_gene_id_column = None self.num_of_custom_sets = None self.rows_by_cluster = None self.rows_by_batch = None self.clusters = [] self.new_cluster_profile = [] # data model self.cluster_info_model = None # Info info_box = vBox(self.controlArea, 'Info') self.input_info = widgetLabel(info_box) # Cluster selection self.cluster_indicator_model = itemmodels.DomainModel( valid_types=(DiscreteVariable, ), separators=False) self.cluster_indicator_box = widgetBox(self.controlArea, 'Cluster Indicator') self.cluster_indicator_view = listView( self.cluster_indicator_box, self, 'cluster_indicators', model=self.cluster_indicator_model, selectionMode=QListWidget.MultiSelection, callback=self.invalidate, sizeHint=QSize(256, 70), ) # Batch selection self.batch_indicator_model = itemmodels.DomainModel( valid_types=(DiscreteVariable, ), separators=False, placeholder="") box = widgetBox(self.controlArea, 'Batch Indicator') self.batch_indicator_combobox = comboBox( box, self, 'batch_indicator', model=self.batch_indicator_model, sendSelectedValue=True, callback=self.batch_indicator_changed, ) # Gene scoring box = widgetBox(self.controlArea, 'Gene Scoring') self.gene_scoring = GeneScoringWidget(box, self) self.gene_scoring.set_method_selection_area('scoring_method_selection') self.gene_scoring.set_method_design_area('scoring_method_design') self.gene_scoring.set_test_type('scoring_test_type') # Gene Sets widget gene_sets_box = widgetBox(self.controlArea, "Gene Sets") self.gs_widget = GeneSetsSelection(gene_sets_box, self, 'stored_gene_sets_selection') self.gs_widget.hierarchy_tree_widget.itemClicked.connect( self.__gene_sets_enrichment) # custom gene sets area box = vBox(self.controlArea, "Custom Gene Sets") if self.custom_gene_set_indicator not in self.feature_model: self.custom_gene_set_indicator = None self.gs_label_combobox = comboBox( box, self, "custom_gene_set_indicator", sendSelectedValue=True, model=self.feature_model, callback=self.handle_custom_gene_sets, ) self.gs_label_combobox.setDisabled(True) # main area splitter = QSplitter(Qt.Horizontal, self.mainArea) self.mainArea.layout().addWidget(splitter) genes_filter = widgetBox(splitter, 'Filter Genes', orientation=QHBoxLayout()) spin( genes_filter, self, 'max_gene_count', 0, 10000, label='Count', tooltip='Minimum genes count', checked='use_gene_count_filter', callback=self.filter_genes, callbackOnReturn=True, checkCallback=self.filter_genes, ) doubleSpin( genes_filter, self, 'max_gene_p_value', 0.0, 1.0, 0.0001, label='p-value', tooltip='Maximum p-value of the enrichment score', checked='use_gene_pval_filter', callback=self.filter_genes, callbackOnReturn=True, checkCallback=self.filter_genes, ) doubleSpin( genes_filter, self, 'max_gene_fdr', 0.0, 1.0, 0.0001, label='FDR', tooltip='Maximum false discovery rate', checked='use_gene_fdr_filter', callback=self.filter_genes, callbackOnReturn=True, checkCallback=self.filter_genes, ) gene_sets_filter = widgetBox(splitter, 'Filter Gene Sets', orientation=QHBoxLayout()) spin( gene_sets_filter, self, 'min_gs_count', 0, DISPLAY_GENE_SETS_COUNT, label='Count', tooltip='Minimum genes count', checked='use_gs_count_filter', callback=self.filter_gene_sets, callbackOnReturn=True, checkCallback=self.filter_gene_sets, ) doubleSpin( gene_sets_filter, self, 'max_gs_p_value', 0.0, 1.0, 0.0001, label='p-value', tooltip='Maximum p-value of the enrichment score', checked='use_gs_pval_filter', callback=self.filter_gene_sets, callbackOnReturn=True, checkCallback=self.filter_gene_sets, ) doubleSpin( gene_sets_filter, self, 'max_gs_fdr', 0.0, 1.0, 0.0001, label='FDR', tooltip='Maximum false discovery rate', checked='use_gs_max_fdr', callback=self.filter_gene_sets, callbackOnReturn=True, checkCallback=self.filter_gene_sets, ) self.cluster_info_view = QTableView() self.cluster_info_view.verticalHeader().setVisible(False) self.cluster_info_view.setItemDelegate(HTMLDelegate()) self.cluster_info_view.horizontalHeader().hide() self.cluster_info_view.horizontalHeader().setSectionResizeMode( QHeaderView.Stretch) auto_commit(self.controlArea, self, "auto_commit", "&Commit", box=False) self.mainArea.layout().addWidget(self.cluster_info_view)
def __init__(self): super().__init__() self.data = None self.results = None self.learners = [] self.headers = [] self.learners_box = gui.listBox(self.controlArea, self, "selected_learner", "learners", box=True, callback=self._learner_changed) self.outputbox = gui.vBox(self.controlArea, "Output") box = gui.hBox(self.outputbox) gui.checkBox(box, self, "append_predictions", "Predictions", callback=self._invalidate) gui.checkBox(box, self, "append_probabilities", "Probabilities", callback=self._invalidate) gui.auto_commit(self.outputbox, self, "autocommit", "Send Selected", "Send Automatically", box=False) self.mainArea.layout().setContentsMargins(0, 0, 0, 0) box = gui.vBox(self.mainArea, box=True) sbox = gui.hBox(box) gui.rubber(sbox) gui.comboBox(sbox, self, "selected_quantity", items=self.quantities, label="Show: ", orientation=Qt.Horizontal, callback=self._update) self.tablemodel = QStandardItemModel(self) view = self.tableview = QTableView( editTriggers=QTableView.NoEditTriggers) view.setModel(self.tablemodel) view.horizontalHeader().hide() view.verticalHeader().hide() view.horizontalHeader().setMinimumSectionSize(60) view.selectionModel().selectionChanged.connect(self._invalidate) view.setShowGrid(False) view.setItemDelegate(BorderedItemDelegate(Qt.white)) view.setSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.MinimumExpanding) view.clicked.connect(self.cell_clicked) box.layout().addWidget(view) selbox = gui.hBox(box) gui.button(selbox, self, "Select Correct", callback=self.select_correct, autoDefault=False) gui.button(selbox, self, "Select Misclassified", callback=self.select_wrong, autoDefault=False) gui.button(selbox, self, "Clear Selection", callback=self.select_none, autoDefault=False)
class OWGEODatasets(OWWidget, ConcurrentWidgetMixin): name = "GEO Data Sets" description = "Access to Gene Expression Omnibus data sets." icon = "icons/OWGEODatasets.svg" priority = 10 class Warning(OWWidget.Warning): using_local_files = Msg( "Can't connect to serverfiles. Using cached files.") class Error(OWWidget.Error): no_connection = Msg("Widget can't connect to serverfiles.") class Outputs: gds_data = Output("Expression Data", Table) search_pattern = Setting('') auto_commit = Setting(True) genes_as_rows = Setting(False) selected_gds = Setting(None) gds_selection_states = Setting({}) splitter_settings = Setting(( b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01' b'\xea\x00\x00\x00\xd7\x01\x00\x00\x00\x07\x01\x00\x00\x00\x02', b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01' b'\xb5\x00\x00\x02\x10\x01\x00\x00\x00\x07\x01\x00\x00\x00\x01', )) def __init__(self): OWWidget.__init__(self) ConcurrentWidgetMixin.__init__(self) try: self.gds_info: Optional[GDSInfo] = GDSInfo() except requests.exceptions.ConnectionError: self.gds_info = {} self.Error.no_connection() return self.gds_data: Optional[Table] = None # Control area box = widgetBox(self.controlArea, 'Info', addSpace=True) self.infoBox = widgetLabel(box, 'Initializing\n\n') box = widgetBox(self.controlArea, 'Output', addSpace=True) radioButtonsInBox(box, self, 'genes_as_rows', ['Samples in rows', 'Genes in rows'], callback=self._run) separator(box) rubber(self.controlArea) auto_commit(self.controlArea, self, 'auto_commit', '&Commit', box=False) # Main Area # Filter widget self.filter = lineEdit(self.mainArea, self, 'search_pattern', 'Filter:', callbackOnType=True, callback=self._apply_filter) self.mainArea.layout().addWidget(self.filter) splitter_vertical = QSplitter(Qt.Vertical, self.mainArea) self.mainArea.layout().addWidget(splitter_vertical) # set table view self.table_view = QTableView(splitter_vertical) self.table_view.setShowGrid(False) self.table_view.setSortingEnabled(True) self.table_view.sortByColumn(1, Qt.AscendingOrder) self.table_view.setAlternatingRowColors(True) self.table_view.verticalHeader().setVisible(False) self.table_view.setSelectionBehavior(QAbstractItemView.SelectRows) self.table_view.setSelectionMode(QAbstractItemView.SingleSelection) self.table_view.viewport().setMouseTracking(True) self.table_view.setSizeAdjustPolicy( QAbstractScrollArea.AdjustToContents) self.table_model = GEODatasetsModel() self.table_model.initialize(self.gds_info) self.table_view.setModel(self.table_model) self.table_view.horizontalHeader().setStretchLastSection(True) self.table_view.resizeColumnsToContents() v_header = self.table_view.verticalHeader() option = self.table_view.viewOptions() size = self.table_view.style().sizeFromContents( QStyle.CT_ItemViewItem, option, QSize(20, 20), self.table_view) v_header.setDefaultSectionSize(size.height() + 2) v_header.setMinimumSectionSize(5) # set item delegates self.table_view.setItemDelegateForColumn( self.table_model.pubmed_id_col, LinkStyledItemDelegate(self.table_view)) self.table_view.setItemDelegateForColumn( self.table_model.gds_id_col, LinkStyledItemDelegate(self.table_view)) self.table_view.setItemDelegateForColumn( self.table_model.indicator_col, IndicatorItemDelegate(self.table_view, role=Qt.DisplayRole)) splitter_horizontal = QSplitter(Qt.Horizontal, splitter_vertical) # Description Widget box = widgetBox(splitter_horizontal, 'Description') self.description_widget = widgetLabel(box, '') self.description_widget.setWordWrap(True) rubber(box) # Sample Annotations Widget box = widgetBox(splitter_horizontal, 'Sample Annotations') self.annotations_widget = QTreeWidget(box) self.annotations_widget.setHeaderLabels( ['Type (Sample annotations)', 'Sample count']) self.annotations_widget.setRootIsDecorated(True) box.layout().addWidget(self.annotations_widget) self._annotations_updating = False self.annotations_widget.itemChanged.connect( self.on_annotation_selection_changed) self.splitters = splitter_vertical, splitter_horizontal for sp, setting in zip(self.splitters, self.splitter_settings): sp.splitterMoved.connect(self._splitter_moved) sp.restoreState(setting) self.table_view.selectionModel().selectionChanged.connect( self.on_gds_selection_changed) self._apply_filter() self.commit() def _splitter_moved(self, *args): self.splitter_settings = [ bytes(sp.saveState()) for sp in self.splitters ] def _set_description_widget(self): self.description_widget.setText( self.selected_gds.get('description', 'Description not available.')) def _set_annotations_widget(self, gds): self._annotations_updating = True self.annotations_widget.clear() annotations = defaultdict(set) subsets_count = {} for desc in gds['subsets']: annotations[desc['type']].add(desc['description']) subsets_count[desc['description']] = str(len(desc['sample_id'])) for _type, subsets in annotations.items(): key = (gds["name"], _type) parent = QTreeWidgetItem(self.annotations_widget, [_type]) parent.key = key for subset in subsets: key = (gds['name'], _type, subset) item = QTreeWidgetItem( parent, [subset, subsets_count.get(subset, '')]) item.setFlags(item.flags() | Qt.ItemIsUserCheckable) item.setCheckState( 0, self.gds_selection_states.get(key, Qt.Checked)) item.key = key self._annotations_updating = False self.annotations_widget.expandAll() for i in range(self.annotations_widget.columnCount()): self.annotations_widget.resizeColumnToContents(i) def _set_selection(self): if self.selected_gds is not None: index = self.table_model.get_row_index( self.selected_gds.get('name')) if index is not None: self.table_view.selectionModel().blockSignals(True) self.table_view.selectRow(index) self._handle_selection_changed() self.table_view.selectionModel().blockSignals(False) def _handle_selection_changed(self): if self.table_model.table is not None: selection = self.table_view.selectionModel().selectedRows( self.table_model.gds_id_col) selected_gds_name = selection[0].data( ) if len(selection) > 0 else None if selected_gds_name: self.selected_gds = self.table_model.info.get( selected_gds_name) self._set_annotations_widget(self.selected_gds) self._set_description_widget() else: self.annotations_widget.clear() self.description_widget.clear() self.update_info() def _apply_filter(self): if self.table_model.table is not None: self.table_model.show_table( filter_pattern=str(self.search_pattern)) self._set_selection() self.update_info() def _run(self): self.Warning.using_local_files.clear() if self.selected_gds is not None: self.gds_data = None self.start(run_download_task, self.selected_gds.get('name'), self.get_selected_samples(), self.genes_as_rows) def on_gds_selection_changed(self): self._handle_selection_changed() self.commit() def on_annotation_selection_changed(self): if self._annotations_updating: return for i in range(self.annotations_widget.topLevelItemCount()): item = self.annotations_widget.topLevelItem(i) if 'key' in item.__dict__: self.gds_selection_states[item.key] = item.checkState(0) for j in range(item.childCount()): child = item.child(j) if 'key' in child.__dict__: self.gds_selection_states[child.key] = child.checkState(0) self.commit() def update_info(self): all_gds = len(self.table_model.info) text = "{} datasets\n{} datasets cached\n".format( all_gds, len(local_files.listfiles())) filtered = self.table_view.model().rowCount() if all_gds != filtered: text += "{} after filtering".format(filtered) self.infoBox.setText(text) def get_selected_samples(self): """ Return the currently selected sample annotations. The return value is a list of selected (sample type, sample value) tuples. .. note:: if some Sample annotation type has no selected values. this method will return all values for it. TODO: this could probably be simplified. """ def childiter(item): """ Iterate over the children of an QTreeWidgetItem instance. """ for i in range(item.childCount()): yield item.child(i) samples = [] unused_types = [] used_types = [] for stype in childiter(self.annotations_widget.invisibleRootItem()): selected_values = [] all_values = [] for sval in childiter(stype): value = (str(stype.text(0)), str(sval.text(0))) if self.gds_selection_states.get(sval.key, True): selected_values.append(value) all_values.append(value) if selected_values: samples.extend(selected_values) used_types.append(str(stype.text(0))) else: # If no sample of sample type is selected we don't filter on it. samples.extend(all_values) unused_types.append(str(stype.text(0))) _samples = defaultdict(list) for sample, sample_type in samples: _samples[sample].append(sample_type) return _samples def commit(self): self._run() def on_exception(self, ex: Exception): self.Warning.using_local_files() def on_done(self, result: Result): assert isinstance(result.gds_dataset, Table) self.gds_data = result.gds_dataset if self.gds_info: self.table_model.update_cache_indicator() self._apply_filter() self.Outputs.gds_data.send(self.gds_data) def on_partial_result(self, result: Any) -> None: pass def onDeleteWidget(self): self.shutdown() super().onDeleteWidget() def send_report(self): self.report_items( "GEO Dataset", [ ("ID", self.selected_gds['name']), ("Title", self.selected_gds['title']), ("Organism", self.selected_gds['sample_organism']), ], ) self.report_items( "Data", [ ("Samples", self.selected_gds['sample_count']), ("Features", self.selected_gds['variables']), ("Genes", self.selected_gds['genes']), ], ) self.report_name("Sample annotations") subsets = defaultdict(list) for subset in self.selected_gds['subsets']: subsets[subset['type']].append( (subset['description'], len(subset['sample_id']))) self.report_html += "<ul>" for _type in subsets: self.report_html += "<b>" + _type + ":</b></br>" for desc, count in subsets[_type]: self.report_html += 9 * " " + "<b>{}:</b> {}</br>".format( desc, count) self.report_html += "</ul>"
def __init__(self): OWWidget.__init__(self) ConcurrentWidgetMixin.__init__(self) try: self.gds_info: Optional[GDSInfo] = GDSInfo() except requests.exceptions.ConnectionError: self.gds_info = {} self.Error.no_connection() return self.gds_data: Optional[Table] = None # Control area box = widgetBox(self.controlArea, 'Info', addSpace=True) self.infoBox = widgetLabel(box, 'Initializing\n\n') box = widgetBox(self.controlArea, 'Output', addSpace=True) radioButtonsInBox(box, self, 'genes_as_rows', ['Samples in rows', 'Genes in rows'], callback=self._run) separator(box) rubber(self.controlArea) auto_commit(self.controlArea, self, 'auto_commit', '&Commit', box=False) # Main Area # Filter widget self.filter = lineEdit(self.mainArea, self, 'search_pattern', 'Filter:', callbackOnType=True, callback=self._apply_filter) self.mainArea.layout().addWidget(self.filter) splitter_vertical = QSplitter(Qt.Vertical, self.mainArea) self.mainArea.layout().addWidget(splitter_vertical) # set table view self.table_view = QTableView(splitter_vertical) self.table_view.setShowGrid(False) self.table_view.setSortingEnabled(True) self.table_view.sortByColumn(1, Qt.AscendingOrder) self.table_view.setAlternatingRowColors(True) self.table_view.verticalHeader().setVisible(False) self.table_view.setSelectionBehavior(QAbstractItemView.SelectRows) self.table_view.setSelectionMode(QAbstractItemView.SingleSelection) self.table_view.viewport().setMouseTracking(True) self.table_view.setSizeAdjustPolicy( QAbstractScrollArea.AdjustToContents) self.table_model = GEODatasetsModel() self.table_model.initialize(self.gds_info) self.table_view.setModel(self.table_model) self.table_view.horizontalHeader().setStretchLastSection(True) self.table_view.resizeColumnsToContents() v_header = self.table_view.verticalHeader() option = self.table_view.viewOptions() size = self.table_view.style().sizeFromContents( QStyle.CT_ItemViewItem, option, QSize(20, 20), self.table_view) v_header.setDefaultSectionSize(size.height() + 2) v_header.setMinimumSectionSize(5) # set item delegates self.table_view.setItemDelegateForColumn( self.table_model.pubmed_id_col, LinkStyledItemDelegate(self.table_view)) self.table_view.setItemDelegateForColumn( self.table_model.gds_id_col, LinkStyledItemDelegate(self.table_view)) self.table_view.setItemDelegateForColumn( self.table_model.indicator_col, IndicatorItemDelegate(self.table_view, role=Qt.DisplayRole)) splitter_horizontal = QSplitter(Qt.Horizontal, splitter_vertical) # Description Widget box = widgetBox(splitter_horizontal, 'Description') self.description_widget = widgetLabel(box, '') self.description_widget.setWordWrap(True) rubber(box) # Sample Annotations Widget box = widgetBox(splitter_horizontal, 'Sample Annotations') self.annotations_widget = QTreeWidget(box) self.annotations_widget.setHeaderLabels( ['Type (Sample annotations)', 'Sample count']) self.annotations_widget.setRootIsDecorated(True) box.layout().addWidget(self.annotations_widget) self._annotations_updating = False self.annotations_widget.itemChanged.connect( self.on_annotation_selection_changed) self.splitters = splitter_vertical, splitter_horizontal for sp, setting in zip(self.splitters, self.splitter_settings): sp.splitterMoved.connect(self._splitter_moved) sp.restoreState(setting) self.table_view.selectionModel().selectionChanged.connect( self.on_gds_selection_changed) self._apply_filter() self.commit()
class OWConcordance(OWWidget): name = "Concordance" description = "Display the context of the word." icon = "icons/Concordance.svg" priority = 520 class Inputs: corpus = Input("Corpus", Corpus) query_word = Input("Query Word", Topic) class Outputs: selected_documents = Output("Selected Documents", Corpus) concordances = Output("Concordances", Corpus) settingsHandler = PerfectDomainContextHandler( match_values = PerfectDomainContextHandler.MATCH_VALUES_ALL ) autocommit = Setting(True) context_width = Setting(5) word = ContextSetting("", exclude_metas=False) selected_rows = Setting([], schema_only=True) class Warning(OWWidget.Warning): multiple_words_on_input = Msg("Multiple query words on input. " "Only the first one is considered!") def __init__(self): super().__init__() self.corpus = None # Corpus self.n_matching = '' # Info on docs matching the word self.n_tokens = '' # Info on tokens self.n_types = '' # Info on types (unique tokens) self.is_word_on_input = False # Info attributes info_box = gui.widgetBox(self.controlArea, 'Info') gui.label(info_box, self, 'Tokens: %(n_tokens)s') gui.label(info_box, self, 'Types: %(n_types)s') gui.label(info_box, self, 'Matching: %(n_matching)s') # Width parameter gui.spin(self.controlArea, self, 'context_width', 3, 10, box=True, label="Number of words:", callback=self.set_width) gui.rubber(self.controlArea) # Search c_box = gui.widgetBox(self.mainArea, orientation="vertical") self.input = gui.lineEdit( c_box, self, 'word', orientation=Qt.Horizontal, sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed), label='Query:', callback=self.set_word, callbackOnType=True) self.input.setFocus() # Concordances view self.conc_view = QTableView() self.model = ConcordanceModel() self.conc_view.setModel(self.model) self.conc_view.setWordWrap(False) self.conc_view.setSelectionBehavior(QTableView.SelectRows) self.conc_view.setSelectionModel(DocumentSelectionModel(self.model)) self.conc_view.setItemDelegate(HorizontalGridDelegate()) self.conc_view.selectionModel().selectionChanged.connect(self.selection_changed) self.conc_view.horizontalHeader().hide() self.conc_view.setShowGrid(False) self.mainArea.layout().addWidget(self.conc_view) self.set_width() # Auto-commit box gui.auto_commit(self.controlArea, self, 'autocommit', 'Commit', 'Auto commit is on') def sizeHint(self): # pragma: no cover return QSize(600, 400) def set_width(self): sel = self.conc_view.selectionModel().selection() self.model.set_width(self.context_width) if sel: self.conc_view.selectionModel().select(sel, QItemSelectionModel.SelectCurrent | QItemSelectionModel.Rows) def selection_changed(self): selection = self.conc_view.selectionModel().selection() self.selected_rows = sorted(set(cell.row() for cell in selection.indexes())) self.commit() def set_selection(self, selection): if selection: sel = QItemSelection() for row in selection: index = self.conc_view.model().index(row, 0) sel.select(index, index) self.conc_view.selectionModel().select(sel, QItemSelectionModel.SelectCurrent | QItemSelectionModel.Rows) @Inputs.corpus def set_corpus(self, data=None): self.closeContext() self.corpus = data if data is None: # data removed, clear selection self.selected_rows = [] if not self.is_word_on_input: self.word = "" self.openContext(self.corpus) self.model.set_corpus(self.corpus) self.set_word() @Inputs.query_word def set_word_from_input(self, topic): self.Warning.multiple_words_on_input.clear() if self.is_word_on_input: # word changed, clear selection self.selected_rows = [] self.is_word_on_input = topic is not None and len(topic) > 0 self.input.setEnabled(not self.is_word_on_input) if self.is_word_on_input: if len(topic) > 1: self.Warning.multiple_words_on_input() self.word = topic.metas[0, 0] self.set_word() def set_word(self): self.selected_rows = [] self.model.set_word(self.word) self.update_widget() self.commit() def handleNewSignals(self): self.set_selection(self.selected_rows) def resize_columns(self): col_width = (self.conc_view.width() - self.conc_view.columnWidth(1)) / 2 - 12 self.conc_view.setColumnWidth(0, col_width) self.conc_view.setColumnWidth(2, col_width) def resizeEvent(self, event): # pragma: no cover super().resizeEvent(event) self.resize_columns() def update_widget(self): self.conc_view.resizeColumnToContents(1) self.resize_columns() self.conc_view.resizeRowsToContents() if self.corpus is not None: self.n_matching = '{}/{}'.format( self.model.matching_docs() if self.word else 0, len(self.corpus)) self.n_tokens = self.model.n_tokens self.n_types = self.model.n_types else: self.n_matching = '' self.n_tokens = '' self.n_types = '' def commit(self): selected_docs = sorted(set(self.model.word_index[row][0] for row in self.selected_rows)) concordance = self.model.get_data() if selected_docs: selected = self.corpus[selected_docs] self.Outputs.selected_documents.send(selected) else: self.Outputs.selected_documents.send(None) self.Outputs.concordances.send(concordance) def send_report(self): view = self.conc_view model = self.conc_view.model() self.report_items("Concordances", ( ("Query", model.word), ("Tokens", model.n_tokens), ("Types", model.n_types), ("Matching", self.n_matching), )) self.report_table(view)
class OWConcordance(OWWidget): name = "Concordance" description = "Display the context of the word." icon = "icons/Concordance.svg" priority = 520 class Inputs: corpus = Input("Corpus", Corpus) query_word = Input("Query Word", Topic) class Outputs: selected_documents = Output("Selected Documents", Corpus) settingsHandler = PerfectDomainContextHandler( match_values = PerfectDomainContextHandler.MATCH_VALUES_ALL ) autocommit = Setting(True) context_width = Setting(5) word = ContextSetting("", exclude_metas=False) selected_rows = Setting([], schema_only=True) class Warning(OWWidget.Warning): multiple_words_on_input = Msg("Multiple query words on input. " "Only the first one is considered!") def __init__(self): super().__init__() self.corpus = None # Corpus self.n_matching = '' # Info on docs matching the word self.n_tokens = '' # Info on tokens self.n_types = '' # Info on types (unique tokens) self.is_word_on_input = False # Info attributes info_box = gui.widgetBox(self.controlArea, 'Info') gui.label(info_box, self, 'Tokens: %(n_tokens)s') gui.label(info_box, self, 'Types: %(n_types)s') gui.label(info_box, self, 'Matching: %(n_matching)s') # Width parameter gui.spin(self.controlArea, self, 'context_width', 3, 10, box=True, label="Number of words:", callback=self.set_width) gui.rubber(self.controlArea) # Search c_box = gui.widgetBox(self.mainArea, orientation="vertical") self.input = gui.lineEdit( c_box, self, 'word', orientation=Qt.Horizontal, sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed), label='Query:', callback=self.set_word, callbackOnType=True) self.input.setFocus() # Concordances view self.conc_view = QTableView() self.model = ConcordanceModel() self.conc_view.setModel(self.model) self.conc_view.setWordWrap(False) self.conc_view.setSelectionBehavior(QTableView.SelectRows) self.conc_view.setSelectionModel(DocumentSelectionModel(self.model)) self.conc_view.setItemDelegate(HorizontalGridDelegate()) self.conc_view.selectionModel().selectionChanged.connect(self.selection_changed) self.conc_view.horizontalHeader().hide() self.conc_view.setShowGrid(False) self.mainArea.layout().addWidget(self.conc_view) self.set_width() # Auto-commit box gui.auto_commit(self.controlArea, self, 'autocommit', 'Commit', 'Auto commit is on') def sizeHint(self): # pragma: no cover return QSize(600, 400) def set_width(self): sel = self.conc_view.selectionModel().selection() self.model.set_width(self.context_width) if sel: self.conc_view.selectionModel().select(sel, QItemSelectionModel.SelectCurrent | QItemSelectionModel.Rows) def selection_changed(self): selection = self.conc_view.selectionModel().selection() self.selected_rows = sorted(set(cell.row() for cell in selection.indexes())) self.commit() def set_selection(self, selection): if selection: sel = QItemSelection() for row in selection: index = self.conc_view.model().index(row, 0) sel.select(index, index) self.conc_view.selectionModel().select(sel, QItemSelectionModel.SelectCurrent | QItemSelectionModel.Rows) @Inputs.corpus def set_corpus(self, data=None): self.closeContext() self.corpus = data if data is None: # data removed, clear selection self.selected_rows = [] if not self.is_word_on_input: self.word = "" self.openContext(self.corpus) self.model.set_corpus(self.corpus) self.set_word() @Inputs.query_word def set_word_from_input(self, topic): self.Warning.multiple_words_on_input.clear() if self.is_word_on_input: # word changed, clear selection self.selected_rows = [] self.is_word_on_input = topic is not None and len(topic) > 0 self.input.setEnabled(not self.is_word_on_input) if self.is_word_on_input: if len(topic) > 1: self.Warning.multiple_words_on_input() self.word = topic.metas[0, 0] self.set_word() def set_word(self): self.model.set_word(self.word) self.update_widget() self.commit() def handleNewSignals(self): self.set_selection(self.selected_rows) def resize_columns(self): col_width = (self.conc_view.width() - self.conc_view.columnWidth(1)) / 2 - 12 self.conc_view.setColumnWidth(0, col_width) self.conc_view.setColumnWidth(2, col_width) def resizeEvent(self, event): # pragma: no cover super().resizeEvent(event) self.resize_columns() def update_widget(self): self.conc_view.resizeColumnToContents(1) self.resize_columns() self.conc_view.resizeRowsToContents() if self.corpus is not None: self.n_matching = '{}/{}'.format( self.model.matching_docs() if self.word else 0, len(self.corpus)) self.n_tokens = self.model.n_tokens self.n_types = self.model.n_types else: self.n_matching = '' self.n_tokens = '' self.n_types = '' def commit(self): selected_docs = sorted(set(self.model.word_index[row][0] for row in self.selected_rows)) if selected_docs: selected = self.corpus[selected_docs] self.Outputs.selected_documents.send(selected) else: self.Outputs.selected_documents.send(None)
class OWCorpusViewer(OWWidget): name = "Corpus Viewer" description = "Display corpus contents." icon = "icons/CorpusViewer.svg" priority = 70 inputs = [(IO.DATA, Table, 'set_data')] outputs = [(IO.MATCHED, Corpus, widget.Default), (IO.UNMATCHED, Corpus)] search_indices = ContextSetting([0]) # features included in search display_indices = ContextSetting([0]) # features for display show_tokens = Setting(False) autocommit = Setting(True) class Warning(OWWidget.Warning): no_feats_search = Msg('No features included in search.') no_feats_display = Msg('No features selected for display.') def __init__(self): super().__init__() self.corpus = None # Corpus self.corpus_docs = None # Documents generated from Corpus self.output_mask = [] # Output corpus indices self.doc_webview = None # WebView for showing content self.search_features = [] # two copies are needed since Display allows drag & drop self.display_features = [] # Info attributes self.update_info() info_box = gui.widgetBox(self.controlArea, 'Info') gui.label(info_box, self, 'Documents: %(n_documents)s') gui.label(info_box, self, 'Preprocessed: %(is_preprocessed)s') gui.label(info_box, self, ' ◦ Tokens: %(n_tokens)s') gui.label(info_box, self, ' ◦ Types: %(n_types)s') gui.label(info_box, self, 'POS tagged: %(is_pos_tagged)s') gui.label(info_box, self, 'N-grams range: %(ngram_range)s') gui.label(info_box, self, 'Matching: %(n_matching)s') # Search features self.search_listbox = gui.listBox( self.controlArea, self, 'search_indices', 'search_features', selectionMode=QListView.ExtendedSelection, box='Search features', callback=self.regenerate_docs,) # Display features display_box = gui.widgetBox(self.controlArea, 'Display features') self.display_listbox = gui.listBox( display_box, self, 'display_indices', 'display_features', selectionMode=QListView.ExtendedSelection, callback=self.show_docs, enableDragDrop=True) self.show_tokens_checkbox = gui.checkBox(display_box, self, 'show_tokens', 'Show Tokens && Tags', callback=self.show_docs) # Auto-commit box gui.auto_commit(self.controlArea, self, 'autocommit', 'Send data', 'Auto send is on') # Search self.filter_input = gui.lineEdit(self.mainArea, self, '', orientation=Qt.Horizontal, sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed), label='RegExp Filter:') self.filter_input.textChanged.connect(self.refresh_search) # Main area self.splitter = QSplitter( orientation=Qt.Horizontal, childrenCollapsible=False, ) # Document list self.doc_list = QTableView() self.doc_list.setSelectionBehavior(QTableView.SelectRows) self.doc_list.setSelectionMode(QTableView.ExtendedSelection) self.doc_list.setEditTriggers(QAbstractItemView.NoEditTriggers) self.doc_list.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch) self.doc_list.horizontalHeader().setVisible(False) self.splitter.addWidget(self.doc_list) self.doc_list_model = QStandardItemModel(self) self.doc_list.setModel(self.doc_list_model) self.doc_list.selectionModel().selectionChanged.connect(self.show_docs) # Document contents # For PyQt5 WebEngine's setHtml grabs the focus and makes typing hard # More info: http://stackoverflow.com/questions/36609489 # To bypass the annoying behaviour disable the widget for WebEngine self.doc_webview = gui.WebviewWidget(self.splitter, self, debug=True, enabled=HAVE_WEBKIT) self.mainArea.layout().addWidget(self.splitter) def copy_to_clipboard(self): text = self.doc_webview.selectedText() QApplication.clipboard().setText(text) def set_data(self, data=None): self.reset_widget() self.corpus = data if data is not None: if not isinstance(data, Corpus): self.corpus = Corpus.from_table(data.domain, data) self.load_features() self.regenerate_docs() self.commit() def reset_widget(self): # Corpus self.corpus = None self.corpus_docs = None self.output_mask = [] # Widgets self.search_listbox.clear() self.display_listbox.clear() self.filter_input.clear() self.update_info() # Models/vars self.search_features.clear() self.display_features.clear() self.search_indices.clear() self.display_indices.clear() self.doc_list_model.clear() # Warnings self.Warning.clear() # WebView self.doc_webview.setHtml('') def load_features(self): self.search_indices = [] self.display_indices = [] if self.corpus is not None: domain = self.corpus.domain self.search_features = list(filter_visible(chain(domain.variables, domain.metas))) self.display_features = list(filter_visible(chain(domain.variables, domain.metas))) # FIXME: Select features based on ContextSetting self.search_indices = list(range(len(self.search_features))) self.display_indices = list(range(len(self.display_features))) # Enable/disable tokens checkbox if not self.corpus.has_tokens(): self.show_tokens_checkbox.setCheckState(False) self.show_tokens_checkbox.setEnabled(self.corpus.has_tokens()) def list_docs(self): """ List documents into the left scrolling area """ search_keyword = self.filter_input.text().strip('|') try: reg = re.compile(search_keyword, re.IGNORECASE) except sre_constants.error: return def is_match(x): return not bool(search_keyword) or reg.search(x) self.output_mask.clear() self.doc_list_model.clear() for i, (doc, title, content) in enumerate(zip(self.corpus, self.corpus.titles, self.corpus_docs)): if is_match(content): item = QStandardItem() item.setData(title, Qt.DisplayRole) item.setData(doc, Qt.UserRole) self.doc_list_model.appendRow(item) self.output_mask.append(i) if self.doc_list_model.rowCount() > 0: self.doc_list.selectRow(0) # Select the first document else: self.doc_webview.setHtml('') self.commit() def show_docs(self): """ Show the selected documents in the right area """ HTML = ''' <!doctype html> <html> <head> <meta charset='utf-8'> <style> table {{ border-collapse: collapse; }} mark {{ background: #FFCD28; }} tr > td {{ padding-bottom: 3px; padding-top: 3px; }} body {{ font-family: Helvetica; font-size: 10pt; }} .line {{ border-bottom: 1px solid #000; }} .separator {{ height: 5px; }} .variables {{ vertical-align: top; padding-right: 10px; }} .token {{ padding: 3px; border: 1px #B0B0B0 solid; margin-right: 5px; margin-bottom: 5px; display: inline-block; }} </style> </head> <body> {} </body> </html> ''' if self.corpus is None: return self.Warning.no_feats_display.clear() if len(self.display_indices) == 0: self.Warning.no_feats_display() if self.show_tokens: tokens = list(self.corpus.ngrams_iterator(include_postags=True)) marked_search_features = [f for i, f in enumerate(self.search_features) if i in self.search_indices] html = '<table>' for doc_count, index in enumerate(self.doc_list.selectionModel().selectedRows()): if doc_count > 0: # add split html += '<tr class="line separator"><td/><td/></tr>' \ '<tr class="separator"><td/><td/></tr>' row_ind = index.data(Qt.UserRole).row_index for ind in self.display_indices: feature = self.display_features[ind] mark = 'class="mark-area"' if feature in marked_search_features else '' value = index.data(Qt.UserRole)[feature.name] html += '<tr><td class="variables"><strong>{}:</strong></td>' \ '<td {}>{}</td></tr>'.format( feature.name, mark, value) if self.show_tokens: html += '<tr><td class="variables"><strong>Tokens & Tags:</strong></td>' \ '<td>{}</td></tr>'.format(''.join('<span class="token">{}</span>'.format( token) for token in tokens[row_ind])) html += '</table>' self.doc_webview.setHtml(HTML.format(html)) self.load_js() self.highlight_docs() def load_js(self): resources = os.path.join(os.path.dirname(__file__), 'resources') for script in ('jquery-3.1.1.min.js', 'jquery.mark.min.js', 'highlighter.js', ): self.doc_webview.evalJS(open(os.path.join(resources, script), encoding='utf-8').read()) def regenerate_docs(self): self.corpus_docs = None self.Warning.no_feats_search.clear() if self.corpus is not None: feats = [self.search_features[i] for i in self.search_indices] if len(feats) == 0: self.Warning.no_feats_search() self.corpus_docs = self.corpus.documents_from_features(feats) self.refresh_search() def refresh_search(self): if self.corpus: self.list_docs() self.update_info() def highlight_docs(self): search_keyword = self.filter_input.text().\ strip('|').replace('\\', '\\\\') # escape one \ to two for mark.js if search_keyword: self.doc_webview.evalJS('mark("{}");'.format(search_keyword)) def update_info(self): if self.corpus is not None: self.n_documents = len(self.corpus) self.n_matching = '{}/{}'.format(self.doc_list_model.rowCount(), self.n_documents) self.n_tokens = sum(map(len, self.corpus.tokens)) if self.corpus.has_tokens() else 'n/a' self.n_types = len(self.corpus.dictionary) if self.corpus.has_tokens() else 'n/a' self.is_preprocessed = self.corpus.has_tokens() self.is_pos_tagged = self.corpus.pos_tags is not None self.ngram_range = '{}-{}'.format(*self.corpus.ngram_range) else: self.n_documents = '' self.n_matching = '' self.n_tokens = '' self.n_types = '' self.is_preprocessed = '' self.is_pos_tagged = '' self.ngram_range = '' def commit(self): if self.corpus is not None: matched = self.corpus[self.output_mask] output_mask = set(self.output_mask) unmatched_mask = [i for i in range(len(self.corpus)) if i not in output_mask] unmatched = self.corpus[unmatched_mask] self.send(IO.MATCHED, matched) self.send(IO.UNMATCHED, unmatched) else: self.send(IO.MATCHED, None) self.send(IO.UNMATCHED, None)
def set_dataset(self, data, tid=None): """Set the input dataset.""" self.closeContext() if data is not None: if tid in self._inputs: # update existing input slot slot = self._inputs[tid] view = slot.view # reset the (header) view state. view.setModel(None) view.horizontalHeader().setSortIndicator(-1, Qt.AscendingOrder) else: view = QTableView() view.setSortingEnabled(True) view.setHorizontalScrollMode(QTableView.ScrollPerPixel) if self.select_rows: view.setSelectionBehavior(QTableView.SelectRows) header = view.horizontalHeader() header.setSectionsMovable(True) header.setSectionsClickable(True) header.setSortIndicatorShown(True) header.setSortIndicator(-1, Qt.AscendingOrder) # QHeaderView does not 'reset' the model sort column, # because there is no guaranty (requirement) that the # models understand the -1 sort column. def sort_reset(index, order): if view.model() is not None and index == -1: view.model().sort(index, order) header.sortIndicatorChanged.connect(sort_reset) view.dataset = data self.tabs.addTab(view, getattr(data, "name", "Data")) self._setup_table_view(view, data) slot = TableSlot(tid, data, table_summary(data), view) view._input_slot = slot self._inputs[tid] = slot self.tabs.setCurrentIndex(self.tabs.indexOf(view)) self.set_info(slot.summary) if isinstance(slot.summary.len, concurrent.futures.Future): def update(f): QMetaObject.invokeMethod(self, "_update_info", Qt.QueuedConnection) slot.summary.len.add_done_callback(update) elif tid in self._inputs: slot = self._inputs.pop(tid) view = slot.view view.hide() view.deleteLater() self.tabs.removeTab(self.tabs.indexOf(view)) current = self.tabs.currentWidget() if current is not None: self.set_info(current._input_slot.summary) self.tabs.tabBar().setVisible(self.tabs.count() > 1) self.selected_rows = [] self.selected_cols = [] self.openContext(data) self.set_selection() self.commit()
class OWCorpusViewer(OWWidget): name = "Corpus Viewer" description = "Display corpus contents." icon = "icons/CorpusViewer.svg" priority = 500 class Inputs: corpus = Input("Corpus", Corpus, replaces=["Data"]) class Outputs: matching_docs = Output("Matching Docs", Corpus, default=True) other_docs = Output("Other Docs", Corpus) settingsHandler = PerfectDomainContextHandler( match_values = PerfectDomainContextHandler.MATCH_VALUES_ALL ) search_indices = ContextSetting([], exclude_metas=False) # features included in search display_indices = ContextSetting([], exclude_metas=False) # features for display display_features = ContextSetting([], exclude_metas=False) selected_documents = ContextSetting([]) regexp_filter = ContextSetting("") show_tokens = Setting(False) autocommit = Setting(True) class Warning(OWWidget.Warning): no_feats_search = Msg('No features included in search.') no_feats_display = Msg('No features selected for display.') def __init__(self): super().__init__() self.corpus = None # Corpus self.corpus_docs = None # Documents generated from Corpus self.doc_webview = None # WebView for showing content self.search_features = [] # two copies are needed since Display allows drag & drop self.display_list_indices = [0] # Info attributes self.update_info() info_box = gui.widgetBox(self.controlArea, 'Info') gui.label(info_box, self, 'Documents: %(n_documents)s') gui.label(info_box, self, 'Preprocessed: %(is_preprocessed)s') gui.label(info_box, self, ' ◦ Tokens: %(n_tokens)s') gui.label(info_box, self, ' ◦ Types: %(n_types)s') gui.label(info_box, self, 'POS tagged: %(is_pos_tagged)s') gui.label(info_box, self, 'N-grams range: %(ngram_range)s') gui.label(info_box, self, 'Matching: %(n_matching)s') # Search features self.search_listbox = gui.listBox( self.controlArea, self, 'search_indices', 'search_features', selectionMode=QListView.ExtendedSelection, box='Search features', callback=self.search_features_changed) # Display features display_box = gui.widgetBox(self.controlArea, 'Display features') self.display_listbox = gui.listBox( display_box, self, 'display_list_indices', 'display_features', selectionMode=QListView.ExtendedSelection, callback=self.show_docs, enableDragDrop=True) self.show_tokens_checkbox = gui.checkBox(display_box, self, 'show_tokens', 'Show Tokens && Tags', callback=self.show_docs) # Auto-commit box gui.auto_commit(self.controlArea, self, 'autocommit', 'Send data', 'Auto send is on') # Search self.filter_input = gui.lineEdit(self.mainArea, self, 'regexp_filter', orientation=Qt.Horizontal, sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed), label='RegExp Filter:') self.filter_input.textChanged.connect(self.refresh_search) # Main area self.splitter = QSplitter( orientation=Qt.Horizontal, childrenCollapsible=False, ) # Document list self.doc_list = QTableView() self.doc_list.setSelectionBehavior(QTableView.SelectRows) self.doc_list.setSelectionMode(QTableView.ExtendedSelection) self.doc_list.setEditTriggers(QAbstractItemView.NoEditTriggers) self.doc_list.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch) self.doc_list.horizontalHeader().setVisible(False) self.splitter.addWidget(self.doc_list) self.doc_list_model = QStandardItemModel(self) self.doc_list.setModel(self.doc_list_model) self.doc_list.selectionModel().selectionChanged.connect( self.selection_changed ) # Document contents self.doc_webview = gui.WebviewWidget(self.splitter, debug=False) self.mainArea.layout().addWidget(self.splitter) def copy_to_clipboard(self): text = self.doc_webview.selectedText() QApplication.clipboard().setText(text) @Inputs.corpus def set_data(self, corpus=None): self.closeContext() self.reset_widget() self.corpus = corpus self.search_features = [] if corpus is not None: domain = self.corpus.domain # Enable/disable tokens checkbox if not self.corpus.has_tokens(): self.show_tokens_checkbox.setCheckState(False) self.show_tokens_checkbox.setEnabled(self.corpus.has_tokens()) self.search_features = list(filter_visible(chain(domain.variables, domain.metas))) self.display_features = list(filter_visible(chain(domain.variables, domain.metas))) self.search_indices = list(range(len(self.search_features))) self.display_indices = list(range(len(self.display_features))) self.selected_documents = [corpus.titles[0]] self.openContext(self.corpus) self.display_list_indices = self.display_indices self.regenerate_docs() self.list_docs() self.update_info() self.set_selection() self.show_docs() self.commit() def reset_widget(self): # Corpus self.corpus = None self.corpus_docs = None self.display_features = [] # Widgets self.search_listbox.clear() self.display_listbox.clear() self.filter_input.clear() self.update_info() # Models/vars self.search_features.clear() self.search_indices.clear() self.display_indices.clear() self.doc_list_model.clear() # Warnings self.Warning.clear() # WebView self.doc_webview.setHtml('') def list_docs(self): """ List documents into the left scrolling area """ if self.corpus_docs is None: return search_keyword = self.regexp_filter.strip('|') try: reg = re.compile(search_keyword, re.IGNORECASE) except sre_constants.error: return def is_match(x): return not bool(search_keyword) or reg.search(x) self.doc_list_model.clear() for i, (doc, title, content) in enumerate(zip(self.corpus, self.corpus.titles, self.corpus_docs)): if is_match(content): item = QStandardItem() item.setData(str(title), Qt.DisplayRole) item.setData(doc, Qt.UserRole) self.doc_list_model.appendRow(item) def get_selected_documents_from_view(self) -> Set[str]: """ Returns ------- Set with names of selected documents in the QTableView """ return { i.data(Qt.DisplayRole) for i in self.doc_list.selectionModel().selectedRows() } def set_selection(self) -> None: """ Select documents in selected_documents attribute in the view """ view = self.doc_list model = view.model() previously_selected = self.selected_documents.copy() selection = QItemSelection() for row in range(model.rowCount()): document = model.data(model.index(row, 0), Qt.DisplayRole) if document in self.selected_documents: selection.append(QItemSelectionRange( view.model().index(row, 0), view.model().index(row, 0) )) view.selectionModel().select( selection, QItemSelectionModel.ClearAndSelect ) if len(selection) == 0: # in cases when selection is empty qt's selection_changed is not # called and so we need to manually trigger show_docs self.show_docs() # select emmit selection change signal which causes calling # selection_changed when filtering it means that documents which # are currently filtered out get removed from self.selected_douments # we still want to keep them to be still selected after user removes # filter self.selected_documents = previously_selected def selection_changed(self) -> None: """ Function is called every time the selection changes - when user select new range of documents """ self.selected_documents = self.get_selected_documents_from_view() self.show_docs() self.commit() def show_docs(self): """ Show the selected documents in the right area """ HTML = ''' <!doctype html> <html> <head> <script type="text/javascript" src="resources/jquery-3.1.1.min.js"> </script> <script type="text/javascript" src="resources/jquery.mark.min.js"> </script> <script type="text/javascript" src="resources/highlighter.js"> </script> <meta charset='utf-8'> <style> table {{ border-collapse: collapse; }} mark {{ background: #FFCD28; }} tr > td {{ padding-bottom: 3px; padding-top: 3px; }} body {{ font-family: Helvetica; font-size: 10pt; }} .line {{ border-bottom: 1px solid #000; }} .separator {{ height: 5px; }} .variables {{ vertical-align: top; padding-right: 10px; }} .content {{ /* Adopted from https://css-tricks.com/snippets/css/prevent-long-urls-from-breaking-out-of-container/ */ /* These are technically the same, but use both */ overflow-wrap: break-word; word-wrap: break-word; -ms-word-break: break-all; /* This is the dangerous one in WebKit, as it breaks things wherever */ word-break: break-all; /* Instead use this non-standard one: */ word-break: break-word; /* Adds a hyphen where the word breaks, if supported (No Blink) */ -ms-hyphens: auto; -moz-hyphens: auto; -webkit-hyphens: auto; hyphens: auto; }} .token {{ padding: 3px; border: 1px #B0B0B0 solid; margin-right: 5px; margin-bottom: 5px; display: inline-block; }} img {{ max-width: 100%; }} </style> </head> <body> {} </body> </html> ''' self.display_indices = self.display_list_indices if self.corpus is None: return self.Warning.no_feats_display.clear() if len(self.display_indices) == 0: self.Warning.no_feats_display() if self.show_tokens: tokens = list(self.corpus.ngrams_iterator(include_postags=True)) marked_search_features = [f for i, f in enumerate(self.search_features) if i in self.search_indices] html = '<table>' for doc_count, index in enumerate(self.doc_list.selectionModel().selectedRows()): if doc_count > 0: # add split html += '<tr class="line separator"><td/><td/></tr>' \ '<tr class="separator"><td/><td/></tr>' row_ind = index.data(Qt.UserRole).row_index for ind in self.display_indices: feature = self.display_features[ind] value = str(index.data(Qt.UserRole)[feature.name]) if feature in marked_search_features: value = self.__mark_text(value) value = value.replace('\n', '<br/>') is_image = feature.attributes.get('type', '') == 'image' if is_image and value != '?': value = '<img src="{}"></img>'.format(value) html += '<tr><td class="variables"><strong>{}:</strong></td>' \ '<td class="content">{}</td></tr>'.format( feature.name, value) if self.show_tokens: html += '<tr><td class="variables"><strong>Tokens & Tags:</strong></td>' \ '<td>{}</td></tr>'.format(''.join('<span class="token">{}</span>'.format( token) for token in tokens[row_ind])) html += '</table>' base = QUrl.fromLocalFile(__file__) self.doc_webview.setHtml(HTML.format(html), base) def __mark_text(self, text): search_keyword = self.regexp_filter.strip('|') if not search_keyword: return text try: reg = re.compile(search_keyword, re.IGNORECASE | re.MULTILINE) except sre_constants.error: return text matches = list(reg.finditer(text)) if not matches: return text text = list(text) for m in matches[::-1]: text[m.start():m.end()] = list('<mark data-markjs="true">{}</mark>'\ .format("".join(text[m.start():m.end()]))) return "".join(text) def search_features_changed(self): self.regenerate_docs() self.refresh_search() def regenerate_docs(self): self.corpus_docs = None self.Warning.no_feats_search.clear() if self.corpus is not None: feats = [self.search_features[i] for i in self.search_indices] if len(feats) == 0: self.Warning.no_feats_search() self.corpus_docs = self.corpus.documents_from_features(feats) def refresh_search(self): if self.corpus is not None: self.list_docs() self.set_selection() self.update_info() self.commit() def update_info(self): if self.corpus is not None: self.n_documents = len(self.corpus) self.n_matching = '{}/{}'.format(self.doc_list_model.rowCount(), self.n_documents) self.n_tokens = sum(map(len, self.corpus.tokens)) if self.corpus.has_tokens() else 'n/a' self.n_types = len(self.corpus.dictionary) if self.corpus.has_tokens() else 'n/a' self.is_preprocessed = self.corpus.has_tokens() self.is_pos_tagged = self.corpus.pos_tags is not None self.ngram_range = '{}-{}'.format(*self.corpus.ngram_range) else: self.n_documents = '' self.n_matching = '' self.n_tokens = '' self.n_types = '' self.is_preprocessed = '' self.is_pos_tagged = '' self.ngram_range = '' def commit(self): matched = unmatched = None corpus = self.corpus if corpus is not None: # it returns a set of selected documents which are in view selected_docs = self.get_selected_documents_from_view() titles = corpus.titles matched_mask = [ i for i, t in enumerate(titles) if t in selected_docs ] unmatched_mask = [ i for i, t in enumerate(titles) if t not in selected_docs ] matched = corpus[matched_mask] if len(matched_mask) else None unmatched = corpus[unmatched_mask] if len(unmatched_mask) else None self.Outputs.matching_docs.send(matched) self.Outputs.other_docs.send(unmatched) def send_report(self): self.report_items(( ("Query", self.regexp_filter), ("Matching documents", self.n_matching), )) def showEvent(self, event): super().showEvent(event) self.update_splitter() def update_splitter(self): """ Update splitter that document list on the left never take more than 1/3 of the space. It is only set on showEvent. If user later changes sizes it stays as it is. """ w1, w2 = self.splitter.sizes() ws = w1 + w2 if w2 < 2/3 * ws: self.splitter.setSizes([ws * 1/3, ws * 2/3])
def __init__(self): super().__init__() self.corpus = None # Corpus self.corpus_docs = None # Documents generated from Corpus self.doc_webview = None # WebView for showing content self.search_features = [] # two copies are needed since Display allows drag & drop self.display_list_indices = [0] # Info attributes self.update_info() info_box = gui.widgetBox(self.controlArea, 'Info') gui.label(info_box, self, 'Documents: %(n_documents)s') gui.label(info_box, self, 'Preprocessed: %(is_preprocessed)s') gui.label(info_box, self, ' ◦ Tokens: %(n_tokens)s') gui.label(info_box, self, ' ◦ Types: %(n_types)s') gui.label(info_box, self, 'POS tagged: %(is_pos_tagged)s') gui.label(info_box, self, 'N-grams range: %(ngram_range)s') gui.label(info_box, self, 'Matching: %(n_matching)s') # Search features self.search_listbox = gui.listBox( self.controlArea, self, 'search_indices', 'search_features', selectionMode=QListView.ExtendedSelection, box='Search features', callback=self.search_features_changed) # Display features display_box = gui.widgetBox(self.controlArea, 'Display features') self.display_listbox = gui.listBox( display_box, self, 'display_list_indices', 'display_features', selectionMode=QListView.ExtendedSelection, callback=self.show_docs, enableDragDrop=True) self.show_tokens_checkbox = gui.checkBox(display_box, self, 'show_tokens', 'Show Tokens && Tags', callback=self.show_docs) # Auto-commit box gui.auto_commit(self.controlArea, self, 'autocommit', 'Send data', 'Auto send is on') # Search self.filter_input = gui.lineEdit(self.mainArea, self, 'regexp_filter', orientation=Qt.Horizontal, sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed), label='RegExp Filter:') self.filter_input.textChanged.connect(self.refresh_search) # Main area self.splitter = QSplitter( orientation=Qt.Horizontal, childrenCollapsible=False, ) # Document list self.doc_list = QTableView() self.doc_list.setSelectionBehavior(QTableView.SelectRows) self.doc_list.setSelectionMode(QTableView.ExtendedSelection) self.doc_list.setEditTriggers(QAbstractItemView.NoEditTriggers) self.doc_list.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch) self.doc_list.horizontalHeader().setVisible(False) self.splitter.addWidget(self.doc_list) self.doc_list_model = QStandardItemModel(self) self.doc_list.setModel(self.doc_list_model) self.doc_list.selectionModel().selectionChanged.connect( self.selection_changed ) # Document contents self.doc_webview = gui.WebviewWidget(self.splitter, debug=False) self.mainArea.layout().addWidget(self.splitter)
def set_dataset(self, data, tid=None): """Set the input dataset.""" self.closeContext() if data is not None: if tid in self._inputs: # update existing input slot slot = self._inputs[tid] view = slot.view # reset the (header) view state. view.setModel(None) view.horizontalHeader().setSortIndicator(-1, Qt.AscendingOrder) else: view = QTableView() view.setSortingEnabled(True) view.setHorizontalScrollMode(QTableView.ScrollPerPixel) if self.select_rows: view.setSelectionBehavior(QTableView.SelectRows) header = view.horizontalHeader() header.setSectionsMovable(True) header.setSectionsClickable(True) header.setSortIndicatorShown(True) header.setSortIndicator(-1, Qt.AscendingOrder) # QHeaderView does not 'reset' the model sort column, # because there is no guaranty (requirement) that the # models understand the -1 sort column. def sort_reset(index, order): if view.model() is not None and index == -1: view.model().sort(index, order) header.sortIndicatorChanged.connect(sort_reset) view.dataset = data self.tabs.addTab(view, getattr(data, "name", "Data")) self._setup_table_view(view, data) slot = TableSlot(tid, data, table_summary(data), view) view._input_slot = slot self._inputs[tid] = slot self.tabs.setCurrentIndex(self.tabs.indexOf(view)) self.set_info(slot.summary) if isinstance(slot.summary.len, concurrent.futures.Future): def update(f): QMetaObject.invokeMethod( self, "_update_info", Qt.QueuedConnection) slot.summary.len.add_done_callback(update) elif tid in self._inputs: slot = self._inputs.pop(tid) view = slot.view view.hide() view.deleteLater() self.tabs.removeTab(self.tabs.indexOf(view)) current = self.tabs.currentWidget() if current is not None: self.set_info(current._input_slot.summary) self.tabs.tabBar().setVisible(self.tabs.count() > 1) self.selected_rows = [] self.selected_cols = [] self.openContext(data) self.set_selection() self.commit()
def __init__(self): super().__init__() self.data = None # type: Optional[Table] self.clusterings = {} self.__executor = ThreadExecutor(parent=self) self.__task = None # type: Optional[Task] layout = QGridLayout() bg = gui.radioButtonsInBox( self.controlArea, self, "optimize_k", orientation=layout, box="Number of Clusters", callback=self.update_method, ) layout.addWidget( gui.appendRadioButton(bg, "Fixed:", addToLayout=False), 1, 1) sb = gui.hBox(None, margin=0) gui.spin( sb, self, "k", minv=2, maxv=30, controlWidth=60, alignment=Qt.AlignRight, callback=self.update_k) gui.rubber(sb) layout.addWidget(sb, 1, 2) layout.addWidget( gui.appendRadioButton(bg, "From", addToLayout=False), 2, 1) ftobox = gui.hBox(None) ftobox.layout().setContentsMargins(0, 0, 0, 0) layout.addWidget(ftobox, 2, 2) gui.spin( ftobox, self, "k_from", minv=2, maxv=29, controlWidth=60, alignment=Qt.AlignRight, callback=self.update_from) gui.widgetLabel(ftobox, "to") gui.spin( ftobox, self, "k_to", minv=3, maxv=30, controlWidth=60, alignment=Qt.AlignRight, callback=self.update_to) gui.rubber(ftobox) box = gui.vBox(self.controlArea, "Initialization") gui.comboBox( box, self, "smart_init", items=[m[0] for m in self.INIT_METHODS], callback=self.invalidate) layout = QGridLayout() gui.widgetBox(box, orientation=layout) layout.addWidget(gui.widgetLabel(None, "Re-runs: "), 0, 0, Qt.AlignLeft) sb = gui.hBox(None, margin=0) layout.addWidget(sb, 0, 1) gui.lineEdit( sb, self, "n_init", controlWidth=60, valueType=int, validator=QIntValidator(), callback=self.invalidate) layout.addWidget( gui.widgetLabel(None, "Maximum iterations: "), 1, 0, Qt.AlignLeft) sb = gui.hBox(None, margin=0) layout.addWidget(sb, 1, 1) gui.lineEdit( sb, self, "max_iterations", controlWidth=60, valueType=int, validator=QIntValidator(), callback=self.invalidate) self.apply_button = gui.auto_commit( self.buttonsArea, self, "auto_commit", "Apply", box=None, commit=self.commit) gui.rubber(self.controlArea) box = gui.vBox(self.mainArea, box="Silhouette Scores") self.mainArea.setVisible(self.optimize_k) self.table_model = ClusterTableModel(self) table = self.table_view = QTableView(self.mainArea) table.setModel(self.table_model) table.setSelectionMode(QTableView.SingleSelection) table.setSelectionBehavior(QTableView.SelectRows) table.setItemDelegate(gui.ColoredBarItemDelegate(self, color=Qt.cyan)) table.selectionModel().selectionChanged.connect(self.select_row) table.setMaximumWidth(200) table.horizontalHeader().setStretchLastSection(True) table.horizontalHeader().hide() table.setShowGrid(False) box.layout().addWidget(table)
def __init__(self): super().__init__() self.measure_scores = None self.update_scores = True self.usefulAttributes = [] self.learners = {} self.labels = [] self.out_domain_desc = None self.all_measures = SCORES self.selectedMeasures = dict([(m.name, True) for m in self.all_measures]) # Discrete (0) or continuous (1) class mode self.rankMode = 0 self.data = None self.discMeasures = [m for m in self.all_measures if issubclass(DiscreteVariable, m.score.class_type)] self.contMeasures = [m for m in self.all_measures if issubclass(ContinuousVariable, m.score.class_type)] self.score_checks = [] self.cls_scoring_box = gui.vBox(None, "Scoring for Classification") self.reg_scoring_box = gui.vBox(None, "Scoring for Regression") boxes = [self.cls_scoring_box] * 7 + [self.reg_scoring_box] * 2 for _score, var, box in zip(SCORES, self._score_vars, boxes): check = gui.checkBox( box, self, var, label=_score.name, callback=lambda val=_score: self.measuresSelectionChanged(val)) self.score_checks.append(check) self.score_stack = QStackedWidget(self) self.score_stack.addWidget(self.cls_scoring_box) self.score_stack.addWidget(self.reg_scoring_box) self.score_stack.addWidget(QWidget()) self.controlArea.layout().addWidget(self.score_stack) gui.rubber(self.controlArea) selMethBox = gui.vBox( self.controlArea, "Select Attributes", addSpace=True) grid = QGridLayout() grid.setContentsMargins(6, 0, 6, 0) self.selectButtons = QButtonGroup() self.selectButtons.buttonClicked[int].connect(self.setSelectMethod) def button(text, buttonid, toolTip=None): b = QRadioButton(text) self.selectButtons.addButton(b, buttonid) if toolTip is not None: b.setToolTip(toolTip) return b b1 = button(self.tr("None"), OWRank.SelectNone) b2 = button(self.tr("All"), OWRank.SelectAll) b3 = button(self.tr("Manual"), OWRank.SelectManual) b4 = button(self.tr("Best ranked:"), OWRank.SelectNBest) s = gui.spin(selMethBox, self, "nSelected", 1, 100, callback=self.nSelectedChanged) grid.addWidget(b1, 0, 0) grid.addWidget(b2, 1, 0) grid.addWidget(b3, 2, 0) grid.addWidget(b4, 3, 0) grid.addWidget(s, 3, 1) self.selectButtons.button(self.selectMethod).setChecked(True) selMethBox.layout().addLayout(grid) gui.auto_commit(selMethBox, self, "auto_apply", "Send", box=False) # Discrete, continuous and no_class table views are stacked self.ranksViewStack = QStackedLayout() self.mainArea.layout().addLayout(self.ranksViewStack) self.discRanksView = QTableView() self.ranksViewStack.addWidget(self.discRanksView) self.discRanksView.setSelectionBehavior(QTableView.SelectRows) self.discRanksView.setSelectionMode(QTableView.MultiSelection) self.discRanksView.setSortingEnabled(True) self.discRanksLabels = ["#"] + [m.shortname for m in self.discMeasures] self.discRanksModel = QStandardItemModel(self) self.discRanksModel.setHorizontalHeaderLabels(self.discRanksLabels) self.discRanksProxyModel = MySortProxyModel(self) self.discRanksProxyModel.setSourceModel(self.discRanksModel) self.discRanksView.setModel(self.discRanksProxyModel) self.discRanksView.setColumnWidth(0, 20) self.discRanksView.selectionModel().selectionChanged.connect( self.commit ) self.discRanksView.pressed.connect(self.onSelectItem) self.discRanksView.horizontalHeader().sectionClicked.connect( self.headerClick ) self.discRanksView.verticalHeader().sectionClicked.connect( self.onSelectItem ) if self.headerState[0] is not None: self.discRanksView.horizontalHeader().restoreState( self.headerState[0]) self.contRanksView = QTableView() self.ranksViewStack.addWidget(self.contRanksView) self.contRanksView.setSelectionBehavior(QTableView.SelectRows) self.contRanksView.setSelectionMode(QTableView.MultiSelection) self.contRanksView.setSortingEnabled(True) self.contRanksLabels = ["#"] + [m.shortname for m in self.contMeasures] self.contRanksModel = QStandardItemModel(self) self.contRanksModel.setHorizontalHeaderLabels(self.contRanksLabels) self.contRanksProxyModel = MySortProxyModel(self) self.contRanksProxyModel.setSourceModel(self.contRanksModel) self.contRanksView.setModel(self.contRanksProxyModel) self.contRanksView.setColumnWidth(0, 20) self.contRanksView.selectionModel().selectionChanged.connect( self.commit ) self.contRanksView.pressed.connect(self.onSelectItem) self.contRanksView.horizontalHeader().sectionClicked.connect( self.headerClick ) self.contRanksView.verticalHeader().sectionClicked.connect( self.onSelectItem ) if self.headerState[1] is not None: self.contRanksView.horizontalHeader().restoreState( self.headerState[1]) self.noClassRanksView = QTableView() self.ranksViewStack.addWidget(self.noClassRanksView) self.noClassRanksView.setSelectionBehavior(QTableView.SelectRows) self.noClassRanksView.setSelectionMode(QTableView.MultiSelection) self.noClassRanksView.setSortingEnabled(True) self.noClassRanksLabels = ["#"] self.noClassRanksModel = QStandardItemModel(self) self.noClassRanksModel.setHorizontalHeaderLabels(self.noClassRanksLabels) self.noClassRanksProxyModel = MySortProxyModel(self) self.noClassRanksProxyModel.setSourceModel(self.noClassRanksModel) self.noClassRanksView.setModel(self.noClassRanksProxyModel) self.noClassRanksView.setColumnWidth(0, 20) self.noClassRanksView.selectionModel().selectionChanged.connect( self.commit ) self.noClassRanksView.pressed.connect(self.onSelectItem) self.noClassRanksView.horizontalHeader().sectionClicked.connect( self.headerClick ) self.noClassRanksView.verticalHeader().sectionClicked.connect( self.onSelectItem ) if self.headerState[2] is not None: self.noClassRanksView.horizontalHeader().restoreState( self.headerState[2]) # Switch the current view to Discrete self.switchRanksMode(0) self.resetInternals() self.updateDelegates() self.updateVisibleScoreColumns() self.resize(690, 500) self.measure_scores = table((len(self.measures), 0), None)
class OWTableToRelation(OWWidget): name = "Table to Relation" description = "Convert data table to relation matrix. Label matrix axis." priority = 50000 icon = "icons/TableToRelation.svg" class Inputs: data = Input("Data", Table) class Outputs: relation = Output("Relation", Relation) settingsHandler = PerfectDomainContextHandler() data = None relation_name = ContextSetting("") transpose = ContextSetting(False) row_type = ContextSetting("") selected_meta = ContextSetting(0) row_names = None col_type = ContextSetting("") col_names = None auto_commit = Setting(True) def __init__(self): super().__init__() self.model = None self.view = None self.row_names_combo = None self.icons = gui.attributeIconDict self.populate_control_area() self.populate_main_area() def populate_control_area(self): rel = gui.widgetBox(self.controlArea, "Relation") gui.lineEdit(rel, self, "relation_name", "Name", callbackOnType=True, callback=self.apply) gui.checkBox(rel, self, "transpose", "Transpose", callback=self.apply) col = gui.widgetBox(self.controlArea, "Column") gui.lineEdit(col, self, "col_type", "Object Type", callbackOnType=True, callback=self.apply) row = gui.widgetBox(self.controlArea, "Row") gui.lineEdit(row, self, "row_type", "Object Type", callbackOnType=True, callback=self.apply) self.row_names_combo = gui.comboBox(row, self, "selected_meta", label="Object Names", callback=self.update_row_names) gui.rubber(self.controlArea) gui.auto_commit(self.controlArea, self, "auto_commit", "Send", checkbox_label='Auto-send', orientation='vertical') def populate_main_area(self): grid = QWidget() grid.setLayout(QGridLayout(grid)) self.mainArea.layout().addWidget(grid) col_type = gui.label(None, self, '%(col_type)s') grid.layout().addWidget(col_type, 0, 1) grid.layout().setAlignment(col_type, Qt.AlignHCenter) row_type = gui.label(None, self, '%(row_type)s') grid.layout().addWidget(row_type, 1, 0) grid.layout().setAlignment(row_type, Qt.AlignVCenter) self.view = QTableView() self.model = None grid.layout().addWidget(self.view, 1, 1) def sizeHint(self): return QSize(800, 500) @Inputs.data def set_data(self, data): self.closeContext() self.data = data if data is not None: self.init_attr_values(data.domain.metas) self.openContext(self.data) self.col_names = [str(a.name) for a in data.domain.attributes] if hasattr(data, 'col_type'): self.col_type = data.col_type else: self.init_attr_values(()) self.update_preview() self.update_row_names() self.unconditional_commit() def init_attr_values(self, candidates): self.col_type = "" self.col_names = None if candidates: self.row_type = candidates[0].name self.selected_meta = 1 else: self.row_type = "" self.selected_meta = 0 self.row_names = None self.row_names_combo.clear() self.row_names_combo.addItem('(None)') for var in candidates: self.row_names_combo.addItem(self.icons[var], var.name) self.row_names_combo.setCurrentIndex(self.selected_meta) def update_row_names(self): if self.selected_meta: self.row_names = list(self.data[:, -self.selected_meta].metas.flatten()) else: self.row_names = None if self.model: self.model.headerDataChanged.emit( Qt.Vertical, 0, self.model.rowCount() - 1) self.commit() def update_preview(self): this = self class MyTableModel(TableModel): def headerData(self, section, orientation, role): if orientation == Qt.Vertical and role == Qt.DisplayRole: if this.row_names: return this.row_names[section] else: return super().headerData(section, orientation, role) if self.data: domain = Domain(self.data.domain.attributes) preview_data = Table(domain, self.data) self.model = MyTableModel(preview_data) else: self.model = None self.view.setModel(self.model) def apply(self): self.commit() def commit(self): if self.data: domain = self.data.domain metadata_cols = list(domain.class_vars) + list(domain.metas) metadata = [{var: var.to_val(value) for var, value in zip(metadata_cols, values.list)} for values in self.data[:, metadata_cols]] if self.transpose: relation = fusion.Relation( self.data.X.T, name=self.relation_name, row_type=fusion.ObjectType(self.col_type or 'Unknown'), row_names=self.col_names, col_type=fusion.ObjectType(self.row_type or 'Unknown'), col_names=self.row_names, col_metadata=metadata) else: relation = fusion.Relation( self.data.X, name=self.relation_name, row_type=fusion.ObjectType(self.row_type or 'Unknown'), row_names=self.row_names, row_metadata=metadata, col_type=fusion.ObjectType(self.col_type or 'Unknown'), col_names=self.col_names, ) self.Outputs.relation.send(Relation(relation))