class OWGenes(OWWidget, ConcurrentWidgetMixin): name = "Genes" description = "Tool for working with genes" icon = "../widgets/icons/OWGeneInfo.svg" priority = 40 want_main_area = True selected_organism: int = Setting(11) search_pattern: str = Setting('') exclude_unmatched = Setting(True) replace_id_with_symbol = Setting(True) auto_commit = Setting(True) settingsHandler = DomainContextHandler() selected_gene_col = ContextSetting(None) use_attr_names = ContextSetting(True) replaces = [ 'orangecontrib.bioinformatics.widgets.OWGeneNameMatcher.OWGeneNameMatcher' ] class Inputs: data_table = Input("Data", Table) class Outputs: data_table = Output("Data", Table) gene_matcher_results = Output("Genes", Table) class Information(OWWidget.Information): pass def sizeHint(self): return QSize(1280, 960) def __init__(self): OWWidget.__init__(self) ConcurrentWidgetMixin.__init__(self) # ATTRIBUTES # self.target_database = ENTREZ_ID # input data self.input_data = None self.input_genes = None self.tax_id = None self.column_candidates = [] # input options self.organisms = [] # gene matcher self.gene_matcher = None # progress bar self.progress_bar = None self._timer = QTimer() self._timer.timeout.connect(self._apply_filter) self._timer.setSingleShot(True) # GUI SECTION # # Control area self.info_box = widgetLabel( widgetBox(self.controlArea, "Info", addSpace=True), 'No data on input.\n') organism_box = vBox(self.controlArea, 'Organism') self.organism_select_combobox = comboBox( organism_box, self, 'selected_organism', callback=self.on_input_option_change) self.get_available_organisms() self.organism_select_combobox.setCurrentIndex(self.selected_organism) box = widgetBox(self.controlArea, 'Gene IDs in the input data') self.gene_columns_model = itemmodels.DomainModel( valid_types=(StringVariable, DiscreteVariable)) self.gene_column_combobox = comboBox( box, self, 'selected_gene_col', label='Stored in data column', model=self.gene_columns_model, sendSelectedValue=True, callback=self.on_input_option_change, ) self.attr_names_checkbox = checkBox( box, self, 'use_attr_names', 'Stored as feature (column) names', disables=[(-1, self.gene_column_combobox)], callback=self.on_input_option_change, ) self.gene_column_combobox.setDisabled(bool(self.use_attr_names)) output_box = vBox(self.controlArea, 'Output') # separator(output_box) # output_box.layout().addWidget(horizontal_line()) # separator(output_box) self.exclude_radio = checkBox(output_box, self, 'exclude_unmatched', 'Exclude unmatched genes', callback=self.commit) self.replace_radio = checkBox(output_box, self, 'replace_id_with_symbol', 'Replace feature IDs with gene names', callback=self.commit) auto_commit(self.controlArea, self, "auto_commit", "&Commit", box=False) rubber(self.controlArea) # Main area self.filter = lineEdit(self.mainArea, self, 'search_pattern', 'Filter:', callbackOnType=True, callback=self.handle_filter_callback) # rubber(self.radio_group) self.mainArea.layout().addWidget(self.filter) # set splitter self.splitter = QSplitter() self.splitter.setOrientation(Qt.Vertical) self.table_model = GeneInfoModel() self.table_view = QTableView() self.table_view.setAlternatingRowColors(True) self.table_view.viewport().setMouseTracking(True) self.table_view.setSortingEnabled(True) self.table_view.setShowGrid(False) self.table_view.verticalHeader().hide() # self.table_view.horizontalHeader().setSectionResizeMode(QHeaderView.Stretch) self.unknown_model = UnknownGeneInfoModel() self.unknown_view = QTableView() self.unknown_view.setModel(self.unknown_model) self.unknown_view.verticalHeader().hide() self.unknown_view.setShowGrid(False) self.unknown_view.setSelectionMode(QAbstractItemView.NoSelection) self.unknown_view.horizontalHeader().setSectionResizeMode( QHeaderView.Stretch) self.splitter.addWidget(self.table_view) self.splitter.addWidget(self.unknown_view) self.splitter.setStretchFactor(0, 90) self.splitter.setStretchFactor(1, 10) self.mainArea.layout().addWidget(self.splitter) def handle_filter_callback(self): self._timer.stop() self._timer.start(500) def _apply_filter(self): # filter only if input data is present and model is populated if self.table_model.table is not None: self.table_model.update_model( filter_pattern=str(self.search_pattern)) self.commit() def __reset_widget_state(self): self.table_view.clearSpans() self.table_view.setModel(None) self.table_model.clear() self.unknown_model.clear() self._update_info_box() def _update_info_box(self): if self.input_genes and self.gene_matcher: num_genes = len(self.gene_matcher.genes) known_genes = len(self.gene_matcher.get_known_genes()) info_text = ('{} genes in input data\n' '{} genes match Entrez database\n' '{} genes with match conflicts\n'.format( num_genes, known_genes, num_genes - known_genes)) else: info_text = 'No data on input.' self.info_box.setText(info_text) def on_done(self, _): # update info box self._update_info_box() # set output options self.toggle_radio_options() # set known genes self.table_model.initialize(self.gene_matcher.genes) self.table_view.setModel(self.table_model) self.table_view.selectionModel().selectionChanged.connect(self.commit) self.table_view.setSelectionBehavior(QAbstractItemView.SelectRows) self.table_view.setItemDelegateForColumn( self.table_model.entrez_column_index, LinkStyledItemDelegate(self.table_view)) v_header = self.table_view.verticalHeader() option = self.table_view.viewOptions() size = self.table_view.style().sizeFromContents( QStyle.CT_ItemViewItem, option, QSize(20, 20), self.table_view) v_header.setDefaultSectionSize(size.height() + 2) v_header.setMinimumSectionSize(5) self.table_view.horizontalHeader().setStretchLastSection(True) # set unknown genes self.unknown_model.initialize(self.gene_matcher.genes) self.unknown_view.verticalHeader().setStretchLastSection(True) self._apply_filter() def get_available_organisms(self): available_organism = sorted(((tax_id, taxonomy.name(tax_id)) for tax_id in taxonomy.common_taxids()), key=lambda x: x[1]) self.organisms = [tax_id[0] for tax_id in available_organism] self.organism_select_combobox.addItems( [tax_id[1] for tax_id in available_organism]) def gene_names_from_table(self): """ Extract and return gene names from `Orange.data.Table`. """ self.input_genes = [] if self.input_data: if self.use_attr_names: self.input_genes = [ str(attr.name).strip() for attr in self.input_data.domain.attributes ] else: if self.selected_gene_col is None: self.selected_gene_col = self.gene_column_identifier() self.input_genes = [ str(e[self.selected_gene_col]) for e in self.input_data if not np.isnan(e[self.selected_gene_col]) ] def _update_gene_matcher(self): self.gene_names_from_table() self.gene_matcher = GeneMatcher(self.get_selected_organism(), auto_start=False) self.gene_matcher.genes = self.input_genes # self.gene_matcher.organism = self.get_selected_organism() def get_selected_organism(self): return self.organisms[self.selected_organism] def _run(self): if self.gene_matcher is not None: self.start(run_gene_matcher, self.gene_matcher) def on_input_option_change(self): self.__reset_widget_state() self._update_gene_matcher() self._run() def gene_column_identifier(self): """ Get most suitable column that stores genes. If there are several suitable columns, select the one with most unique values. Take the best one. """ # candidates -> (variable, num of unique values) candidates = ((col, np.unique(self.input_data.get_column_view(col)[0]).size) for col in self.gene_columns_model if isinstance(col, DiscreteVariable) or isinstance(col, StringVariable)) best_candidate, _ = sorted(candidates, key=lambda x: x[1])[-1] return best_candidate def find_genes_location(self): """ Try locate the genes in the input data when we first load the data. Proposed rules: - when no suitable feature names are present, check the columns. - find the most suitable column, that is, the one with most unique values. """ domain = self.input_data.domain if not domain.attributes: if self.selected_gene_col is None: self.selected_gene_col = self.gene_column_identifier() self.use_attr_names = False @Inputs.data_table def handle_input(self, data): self.closeContext() self.input_data = None self.input_genes = None self.__reset_widget_state() self.gene_columns_model.set_domain(None) self.selected_gene_col = None if data: self.input_data = data self.gene_columns_model.set_domain(self.input_data.domain) # check if input table has tax_id, human is used if tax_id is not found self.tax_id = str(self.input_data.attributes.get(TAX_ID, '9606')) # check for gene location. Default is that genes are attributes in the input table. self.use_attr_names = self.input_data.attributes.get( GENE_AS_ATTRIBUTE_NAME, self.use_attr_names) if self.tax_id in self.organisms and not self.selected_organism: self.selected_organism = self.organisms.index(self.tax_id) self.openContext(self.input_data.domain) self.find_genes_location() self.on_input_option_change() def commit(self): selection = self.table_view.selectionModel().selectedRows( self.table_model.entrez_column_index) selected_genes = [row.data() for row in selection] if not len(selected_genes): selected_genes = self.table_model.get_filtered_genes() gene_ids = self.get_target_ids() known_genes = [gid for gid in gene_ids if gid != '?'] table = None gm_table = None if known_genes: # Genes are in rows (we have a column with genes). if not self.use_attr_names: if self.target_database in self.input_data.domain: gene_var = self.input_data.domain[self.target_database] metas = self.input_data.domain.metas else: gene_var = StringVariable(self.target_database) metas = self.input_data.domain.metas + (gene_var, ) domain = Domain(self.input_data.domain.attributes, self.input_data.domain.class_vars, metas) table = self.input_data.transform(domain) col, _ = table.get_column_view(gene_var) col[:] = gene_ids # filter selected rows selected_genes_set = set(selected_genes) selected_rows = [ row_index for row_index, row in enumerate(table) if str(row[gene_var]) in selected_genes_set ] # handle table attributes table.attributes[TAX_ID] = self.get_selected_organism() table.attributes[GENE_AS_ATTRIBUTE_NAME] = False table.attributes[GENE_ID_COLUMN] = self.target_database table = table[selected_rows] if selected_rows else table if self.exclude_unmatched: # create filter from selected column for genes only_known = table_filter.FilterStringList( gene_var, known_genes) # apply filter to the data table = table_filter.Values([only_known])(table) self.Outputs.data_table.send(table) # genes are are in columns (genes are features). else: domain = self.input_data.domain.copy() table = self.input_data.transform(domain) for gene in self.gene_matcher.genes: if gene.input_identifier in table.domain: table.domain[gene.input_identifier].attributes[ self.target_database] = (str(gene.gene_id) if gene.gene_id else '?') if self.replace_id_with_symbol: try: table.domain[gene.input_identifier].name = str( gene.symbol) except AttributeError: # TODO: missing gene symbol, need to handle this? pass # filter selected columns selected_genes_set = set(selected_genes) selected = [ column for column in table.domain.attributes if self.target_database in column.attributes and str(column.attributes[ self.target_database]) in selected_genes_set ] output_attrs = table.domain.attributes if selected: output_attrs = selected if self.exclude_unmatched: known_genes_set = set(known_genes) output_attrs = [ col for col in output_attrs if col.attributes[ self.target_database] in known_genes_set ] domain = Domain(output_attrs, table.domain.class_vars, table.domain.metas) table = table.from_table(domain, table) # handle table attributes table.attributes[TAX_ID] = self.get_selected_organism() table.attributes[GENE_AS_ATTRIBUTE_NAME] = True table.attributes[GENE_ID_ATTRIBUTE] = self.target_database gm_table = self.gene_matcher.to_data_table( selected_genes=selected_genes if selected_genes else None) self.Outputs.data_table.send(table) self.Outputs.gene_matcher_results.send(gm_table) def toggle_radio_options(self): self.replace_radio.setEnabled(bool(self.use_attr_names)) if self.gene_matcher.genes: # enable checkbox if unknown genes are detected self.exclude_radio.setEnabled( len(self.gene_matcher.genes) != len( self.gene_matcher.get_known_genes())) self.exclude_unmatched = len(self.gene_matcher.genes) != len( self.gene_matcher.get_known_genes()) def get_target_ids(self): return [ str(gene.gene_id) if gene.gene_id else '?' for gene in self.gene_matcher.genes ]
class OWPredictions(OWWidget): name = "Predictions" icon = "icons/Predictions.svg" priority = 200 description = "Display the predictions of models for an input dataset." keywords = [] class Inputs: data = Input("Data", Orange.data.Table) predictors = Input("Predictors", Model, multiple=True) class Outputs: predictions = Output("Predictions", Orange.data.Table) evaluation_results = Output("Evaluation Results", Orange.evaluation.Results, dynamic=False) class Warning(OWWidget.Warning): empty_data = Msg("Empty dataset") class Error(OWWidget.Error): predictor_failed = \ Msg("One or more predictors failed (see more...)\n{}") scorer_failed = \ Msg("One or more scorers failed (see more...)\n{}") predictors_target_mismatch = \ Msg("Predictors do not have the same target.") data_target_mismatch = \ Msg("Data does not have the same target as predictors.") settingsHandler = settings.ClassValuesContextHandler() score_table = settings.SettingProvider(ScoreTable) #: Display the full input dataset or only the target variable columns (if #: available) show_attrs = settings.Setting(True) #: Show predicted values (for discrete target variable) show_predictions = settings.Setting(True) #: Show predictions probabilities (for discrete target variable) show_probabilities = settings.Setting(True) #: List of selected class value indices in the "Show probabilities" list selected_classes = settings.ContextSetting([]) #: Draw colored distribution bars draw_dist = settings.Setting(True) output_attrs = settings.Setting(True) output_predictions = settings.Setting(True) output_probabilities = settings.Setting(True) def __init__(self): super().__init__() #: Input data table self.data = None # type: Optional[Orange.data.Table] #: A dict mapping input ids to PredictorSlot self.predictors = OrderedDict() # type: Dict[object, PredictorSlot] #: A class variable (prediction target) self.class_var = None # type: Optional[Orange.data.Variable] #: List of (discrete) class variable's values self.class_values = [] # type: List[str] box = gui.vBox(self.controlArea, "Info") self.infolabel = gui.widgetLabel( box, "No data on input.\nPredictors: 0\nTask: N/A") self.infolabel.setMinimumWidth(150) gui.button(box, self, "Restore Original Order", callback=self._reset_order, tooltip="Show rows in the original order") self.classification_options = box = gui.vBox(self.controlArea, "Show", spacing=-1, addSpace=False) gui.checkBox(box, self, "show_predictions", "Predicted class", callback=self._update_prediction_delegate) b = gui.checkBox(box, self, "show_probabilities", "Predicted probabilities for:", callback=self._update_prediction_delegate) ibox = gui.indentedBox(box, sep=gui.checkButtonOffsetHint(b), addSpace=False) gui.listBox(ibox, self, "selected_classes", "class_values", callback=self._update_prediction_delegate, selectionMode=QListWidget.MultiSelection, addSpace=False) gui.checkBox(box, self, "draw_dist", "Draw distribution bars", callback=self._update_prediction_delegate) box = gui.vBox(self.controlArea, "Data View") gui.checkBox(box, self, "show_attrs", "Show full dataset", callback=self._update_column_visibility) box = gui.vBox(self.controlArea, "Output", spacing=-1) self.checkbox_class = gui.checkBox(box, self, "output_attrs", "Original data", callback=self.commit) self.checkbox_class = gui.checkBox(box, self, "output_predictions", "Predictions", callback=self.commit) self.checkbox_prob = gui.checkBox(box, self, "output_probabilities", "Probabilities", callback=self.commit) gui.rubber(self.controlArea) self.vsplitter = QSplitter(orientation=Qt.Vertical, childrenCollapsible=True, handleWidth=2) self.splitter = QSplitter( orientation=Qt.Horizontal, childrenCollapsible=False, handleWidth=2, ) self.dataview = TableView( verticalScrollBarPolicy=Qt.ScrollBarAlwaysOn, horizontalScrollBarPolicy=Qt.ScrollBarAlwaysOn, horizontalScrollMode=QTableView.ScrollPerPixel, selectionMode=QTableView.NoSelection, focusPolicy=Qt.StrongFocus) self.predictionsview = TableView( verticalScrollBarPolicy=Qt.ScrollBarAlwaysOff, horizontalScrollBarPolicy=Qt.ScrollBarAlwaysOn, horizontalScrollMode=QTableView.ScrollPerPixel, selectionMode=QTableView.NoSelection, focusPolicy=Qt.StrongFocus, sortingEnabled=True, ) self.predictionsview.setItemDelegate(PredictionsItemDelegate()) self.dataview.verticalHeader().hide() dsbar = self.dataview.verticalScrollBar() psbar = self.predictionsview.verticalScrollBar() psbar.valueChanged.connect(dsbar.setValue) dsbar.valueChanged.connect(psbar.setValue) self.dataview.verticalHeader().setDefaultSectionSize(22) self.predictionsview.verticalHeader().setDefaultSectionSize(22) self.dataview.verticalHeader().sectionResized.connect( lambda index, _, size: self.predictionsview.verticalHeader( ).resizeSection(index, size)) self.splitter.addWidget(self.predictionsview) self.splitter.addWidget(self.dataview) self.score_table = ScoreTable(self) self.vsplitter.addWidget(self.splitter) self.vsplitter.addWidget(self.score_table.view) self.vsplitter.setStretchFactor(0, 5) self.vsplitter.setStretchFactor(1, 1) self.mainArea.layout().addWidget(self.vsplitter) @Inputs.data @check_sql_input def set_data(self, data): """Set the input dataset""" if data is not None and not data: data = None self.Warning.empty_data() else: self.Warning.empty_data.clear() self.data = data if data is None: self.dataview.setModel(None) self.predictionsview.setModel(None) self.predictionsview.setItemDelegate(PredictionsItemDelegate()) else: # force full reset of the view's HeaderView state self.dataview.setModel(None) model = TableModel(data, parent=None) modelproxy = TableSortProxyModel() modelproxy.setSourceModel(model) self.dataview.setModel(modelproxy) self._update_column_visibility() self._invalidate_predictions() # pylint: disable=redefined-builtin @Inputs.predictors def set_predictor(self, predictor=None, id=None): if id in self.predictors: if predictor is not None: self.predictors[id] = self.predictors[id]._replace( predictor=predictor, name=predictor.name, results=None) else: del self.predictors[id] elif predictor is not None: self.predictors[id] = \ PredictorSlot(predictor, predictor.name, None) def _set_class_var(self): pred_classes = set(pred.predictor.domain.class_var for pred in self.predictors.values()) self.Error.predictors_target_mismatch.clear() self.Error.data_target_mismatch.clear() self.class_var = None if len(pred_classes) > 1: self.Error.predictors_target_mismatch() if len(pred_classes) == 1: self.class_var = pred_classes.pop() if self.data is not None and \ self.data.domain.class_var is not None and \ self.class_var != self.data.domain.class_var: self.Error.data_target_mismatch() self.class_var = None discrete_class = self.class_var is not None \ and self.class_var.is_discrete self.classification_options.setVisible(discrete_class) self.closeContext() if discrete_class: self.class_values = list(self.class_var.values) self.selected_classes = list(range(len(self.class_values))) self.openContext(self.class_var) else: self.class_values = [] self.selected_classes = [] def handleNewSignals(self): self._set_class_var() self._call_predictors() self._update_scores() self._update_predictions_model() self._update_prediction_delegate() self._set_errors() self._update_info() self.commit() def _call_predictors(self): if not self.data: return for inputid, slot in self.predictors.items(): if slot.results is not None \ and not isinstance(slot.results, str) \ and not numpy.isnan(slot.results.predicted[0]).all(): continue try: pred, prob = self.predict(slot.predictor, self.data) except (ValueError, DomainTransformationError) as err: results = "{}: {}".format(slot.predictor.name, err) else: results = Orange.evaluation.Results() results.data = self.data results.domain = self.data.domain results.row_indices = numpy.arange(len(self.data)) results.folds = (Ellipsis, ) results.actual = self.data.Y results.predicted = pred.reshape((1, len(self.data))) results.probabilities = prob.reshape((1, ) + prob.shape) self.predictors[inputid] = slot._replace(results=results) def _update_scores(self): model = self.score_table.model model.clear() if self.data is None or self.data.domain.class_var is None: scorers = [] else: scorers = usable_scorers(self.data.domain.class_var) self.score_table.update_header(scorers) errors = [] for inputid, pred in self.predictors.items(): name = learner_name(pred.predictor) head = QStandardItem(name) # head.setData(key, Qt.UserRole) row = [head] results = self.predictors[inputid].results if isinstance(results, str): head.setToolTip(results) head.setText("{} (error)".format(name)) head.setForeground(QBrush(Qt.red)) else: for scorer in scorers: item = QStandardItem() try: score = scorer_caller(scorer, results)()[0] item.setText(f"{score:.3f}") except Exception as exc: # pylint: disable=broad-except item.setToolTip(str(exc)) if scorer.name in self.score_table.shown_scores: errors.append(str(exc)) row.append(item) self.score_table.model.appendRow(row) self.Error.scorer_failed("\n".join(errors), shown=bool(errors)) def _set_errors(self): # Not all predictors are run every time, so errors can't be collected # in _call_predictors errors = "\n".join(p.results for p in self.predictors.values() if isinstance(p.results, str)) self.Error.predictor_failed(errors, shown=bool(errors)) def _update_info(self): info = [] if self.data is not None: info.append("Data: {} instances.".format(len(self.data))) else: info.append("Data: N/A") n_predictors = len(self.predictors) n_valid = len(self._valid_predictors()) if n_valid != n_predictors: info.append("Predictors: {} (+ {} failed)".format( n_valid, n_predictors - n_valid)) else: info.append("Predictors: {}".format(n_predictors or "N/A")) if self.class_var is None: info.append("Task: N/A") elif self.class_var.is_discrete: info.append("Task: Classification") self.checkbox_class.setEnabled(True) self.checkbox_prob.setEnabled(True) else: info.append("Task: Regression") self.checkbox_class.setEnabled(False) self.checkbox_prob.setEnabled(False) self.infolabel.setText("\n".join(info)) def _invalidate_predictions(self): for inputid, pred in list(self.predictors.items()): self.predictors[inputid] = pred._replace(results=None) def _valid_predictors(self): if self.class_var is not None and self.data is not None: return [ p for p in self.predictors.values() if p.results is not None and not isinstance(p.results, str) ] else: return [] def _update_predictions_model(self): """Update the prediction view model.""" if self.data is not None and self.class_var is not None: slots = self._valid_predictors() results = [] class_var = self.class_var for p in slots: if isinstance(p.results, str): continue values = p.results.predicted[0] if self.class_var.is_discrete: # if values were added to class_var between building the # model and predicting, add zeros for new class values, # which are always at the end prob = p.results.probabilities[0] prob = numpy.c_[prob, numpy.zeros( (prob.shape[0], len(class_var.values) - prob.shape[1]))] values = [Value(class_var, v) for v in values] else: prob = numpy.zeros((len(values), 0)) results.append((values, prob)) results = list(zip(*(zip(*res) for res in results))) headers = [p.name for p in slots] model = PredictionsModel(results, headers) else: model = None predmodel = PredictionsSortProxyModel() predmodel.setSourceModel(model) predmodel.setDynamicSortFilter(True) self.predictionsview.setItemDelegate(PredictionsItemDelegate()) self.predictionsview.setModel(predmodel) hheader = self.predictionsview.horizontalHeader() hheader.setSortIndicatorShown(False) # SortFilterProxyModel is slow due to large abstraction overhead # (every comparison triggers multiple `model.index(...)`, # model.rowCount(...), `model.parent`, ... calls) hheader.setSectionsClickable(predmodel.rowCount() < 20000) predmodel.layoutChanged.connect(self._update_data_sort_order) self._update_data_sort_order() self.predictionsview.resizeColumnsToContents() def _update_column_visibility(self): """Update data column visibility.""" if self.data is not None and self.class_var is not None: domain = self.data.domain first_attr = len(domain.class_vars) + len(domain.metas) for i in range(first_attr, first_attr + len(domain.attributes)): self.dataview.setColumnHidden(i, not self.show_attrs) if domain.class_var: self.dataview.setColumnHidden(0, False) def _update_data_sort_order(self): """Update data row order to match the current predictions view order""" datamodel = self.dataview.model() # data model proxy predmodel = self.predictionsview.model() # predictions model proxy sortindicatorshown = False if datamodel is not None: assert isinstance(datamodel, TableSortProxyModel) n = datamodel.rowCount() if predmodel is not None and predmodel.sortColumn() >= 0: sortind = numpy.argsort([ predmodel.mapToSource(predmodel.index(i, 0)).row() for i in range(n) ]) sortind = numpy.array(sortind, numpy.int) sortindicatorshown = True else: sortind = None datamodel.setSortIndices(sortind) self.predictionsview.horizontalHeader() \ .setSortIndicatorShown(sortindicatorshown) def _reset_order(self): """Reset the row sorting to original input order.""" datamodel = self.dataview.model() predmodel = self.predictionsview.model() if datamodel is not None: datamodel.sort(-1) if predmodel is not None: predmodel.sort(-1) self.predictionsview.horizontalHeader().setSortIndicatorShown(False) def _update_prediction_delegate(self): """Update the predicted probability visibility state""" if self.class_var is not None: delegate = PredictionsItemDelegate() if self.class_var.is_continuous: self._setup_delegate_continuous(delegate) else: self._setup_delegate_discrete(delegate) proxy = self.predictionsview.model() if proxy is not None: proxy.setProbInd( numpy.array(self.selected_classes, dtype=int)) self.predictionsview.setItemDelegate(delegate) self.predictionsview.resizeColumnsToContents() self._update_spliter() def _setup_delegate_discrete(self, delegate): colors = [QColor(*rgb) for rgb in self.class_var.colors] fmt = [] if self.show_probabilities: fmt.append(" : ".join("{{dist[{}]:.2f}}".format(i) for i in sorted(self.selected_classes))) if self.show_predictions: fmt.append("{value!s}") delegate.setFormat(" \N{RIGHTWARDS ARROW} ".join(fmt)) if self.draw_dist and colors is not None: delegate.setColors(colors) return delegate def _setup_delegate_continuous(self, delegate): delegate.setFormat("{{value:{}}}".format( self.class_var.format_str[1:])) def _update_spliter(self): if self.data is None: return def width(view): h_header = view.horizontalHeader() v_header = view.verticalHeader() return h_header.length() + v_header.width() w = width(self.predictionsview) + 4 w1, w2 = self.splitter.sizes() self.splitter.setSizes([w, w1 + w2 - w]) def commit(self): self._commit_predictions() self._commit_evaluation_results() def _commit_evaluation_results(self): slots = self._valid_predictors() if not slots or self.data.domain.class_var is None: self.Outputs.evaluation_results.send(None) return class_var = self.class_var nanmask = numpy.isnan(self.data.get_column_view(class_var)[0]) data = self.data[~nanmask] results = Orange.evaluation.Results(data, store_data=True) results.folds = None results.row_indices = numpy.arange(len(data)) results.actual = data.Y.ravel() results.predicted = numpy.vstack( tuple(p.results.predicted[0][~nanmask] for p in slots)) if class_var and class_var.is_discrete: results.probabilities = numpy.array( [p.results.probabilities[0][~nanmask] for p in slots]) results.learner_names = [p.name for p in slots] self.Outputs.evaluation_results.send(results) def _commit_predictions(self): slots = self._valid_predictors() if not slots: self.Outputs.predictions.send(None) return if self.class_var and self.class_var.is_discrete: newmetas, newcolumns = self._classification_output_columns() else: newmetas, newcolumns = self._regression_output_columns() attrs = list(self.data.domain.attributes) if self.output_attrs else [] metas = list(self.data.domain.metas) + newmetas domain = \ Orange.data.Domain(attrs, self.data.domain.class_var, metas=metas) predictions = self.data.transform(domain) if newcolumns: newcolumns = numpy.hstack( [numpy.atleast_2d(cols) for cols in newcolumns]) predictions.metas[:, -newcolumns.shape[1]:] = newcolumns self.Outputs.predictions.send(predictions) def _classification_output_columns(self): newmetas = [] newcolumns = [] slots = self._valid_predictors() if self.output_predictions: newmetas += [ DiscreteVariable(name=p.name, values=self.class_values) for p in slots ] newcolumns += [ p.results.predicted[0].reshape((-1, 1)) for p in slots ] if self.output_probabilities: newmetas += [ ContinuousVariable(name="%s (%s)" % (p.name, value)) for p in slots for value in self.class_values ] newcolumns += [p.results.probabilities[0] for p in slots] return newmetas, newcolumns def _regression_output_columns(self): slots = self._valid_predictors() newmetas = [ContinuousVariable(name=p.name) for p in slots] newcolumns = [p.results.predicted[0].reshape((-1, 1)) for p in slots] return newmetas, newcolumns def send_report(self): def merge_data_with_predictions(): data_model = self.dataview.model() predictions_model = self.predictionsview.model() # use ItemDelegate to style prediction values style = lambda x: self.predictionsview.itemDelegate().displayText( x, QLocale()) # iterate only over visible columns of data's QTableView iter_data_cols = list( filter(lambda x: not self.dataview.isColumnHidden(x), range(data_model.columnCount()))) # print header yield [''] + \ [predictions_model.headerData(col, Qt.Horizontal, Qt.DisplayRole) for col in range(predictions_model.columnCount())] + \ [data_model.headerData(col, Qt.Horizontal, Qt.DisplayRole) for col in iter_data_cols] # print data & predictions for i in range(data_model.rowCount()): yield [data_model.headerData(i, Qt.Vertical, Qt.DisplayRole)] + \ [style(predictions_model.data(predictions_model.index(i, j))) for j in range(predictions_model.columnCount())] + \ [data_model.data(data_model.index(i, j)) for j in iter_data_cols] if self.data is not None and self.class_var is not None: text = self.infolabel.text().replace('\n', '<br>') if self.show_probabilities and self.selected_classes: text += '<br>Showing probabilities for: ' text += ', '.join( [self.class_values[i] for i in self.selected_classes]) self.report_paragraph('Info', text) self.report_table("Data & Predictions", merge_data_with_predictions(), header_rows=1, header_columns=1) @classmethod def predict(cls, predictor, data): class_var = predictor.domain.class_var if class_var: if class_var.is_discrete: return cls.predict_discrete(predictor, data) else: return cls.predict_continuous(predictor, data) return None @staticmethod def predict_discrete(predictor, data): return predictor(data, Model.ValueProbs) @staticmethod def predict_continuous(predictor, data): values = predictor(data, Model.Value) return values, numpy.zeros((len(data), 0))