class OWGEODatasets(OWWidget): name = "GEO Data Sets" description = DESCRIPTION icon = "../widgets/icons/GEODataSets.svg" priority = PRIORITY inputs = [] outputs = [("Expression Data", Orange.data.Table)] settingsList = [ "outputRows", "mergeSpots", "gdsSelectionStates", "splitterSettings", "currentGds", "autoCommit", "datasetNames" ] outputRows = Setting(True) mergeSpots = Setting(True) gdsSelectionStates = Setting({}) currentGds = Setting(None) datasetNames = Setting({}) splitterSettings = Setting(( b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xea\x00\x00\x00\xd7\x01\x00\x00\x00\x07\x01\x00\x00\x00\x02', b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xb5\x00\x00\x02\x10\x01\x00\x00\x00\x07\x01\x00\x00\x00\x01' )) autoCommit = Setting(False) def __init__(self, parent=None, signalManager=None, name=" GEO Data Sets"): OWWidget.__init__(self, parent, signalManager, name) self.selectionChanged = False self.filterString = "" self.datasetName = "" ## GUI box = gui.widgetBox(self.controlArea, "Info", addSpace=True) self.infoBox = gui.widgetLabel(box, "Initializing\n\n") box = gui.widgetBox(self.controlArea, "Output", addSpace=True) gui.radioButtonsInBox(box, self, "outputRows", ["Genes in rows", "Samples in rows"], "Rows", callback=self.commitIf) gui.checkBox(box, self, "mergeSpots", "Merge spots of same gene", callback=self.commitIf) gui.separator(box) self.nameEdit = gui.lineEdit( box, self, "datasetName", "Data set name", tooltip="Override the default output data set name", callback=self.onNameEdited) self.nameEdit.setPlaceholderText("") if sys.version_info < (3, ): box = gui.widgetBox(self.controlArea, "Commit", addSpace=True) self.commitButton = gui.button(box, self, "Commit", callback=self.commit) cb = gui.checkBox(box, self, "autoCommit", "Commit on any change") gui.setStopper(self, self.commitButton, cb, "selectionChanged", self.commit) else: gui.auto_commit(self.controlArea, self, "autoCommit", "Commit", box="Commit") self.commitIf = self.commit gui.rubber(self.controlArea) gui.widgetLabel(self.mainArea, "Filter") self.filterLineEdit = QLineEdit(textChanged=self.filter) self.completer = TokenListCompleter(self, caseSensitivity=Qt.CaseInsensitive) self.filterLineEdit.setCompleter(self.completer) self.mainArea.layout().addWidget(self.filterLineEdit) splitter = QSplitter(Qt.Vertical, self.mainArea) self.mainArea.layout().addWidget(splitter) self.treeWidget = QTreeView(splitter) self.treeWidget.setSelectionMode(QTreeView.SingleSelection) self.treeWidget.setRootIsDecorated(False) self.treeWidget.setSortingEnabled(True) self.treeWidget.setAlternatingRowColors(True) self.treeWidget.setUniformRowHeights(True) self.treeWidget.setEditTriggers(QTreeView.NoEditTriggers) linkdelegate = LinkStyledItemDelegate(self.treeWidget) self.treeWidget.setItemDelegateForColumn(1, linkdelegate) self.treeWidget.setItemDelegateForColumn(8, linkdelegate) self.treeWidget.setItemDelegateForColumn( 0, gui.IndicatorItemDelegate(self.treeWidget, role=Qt.DisplayRole)) proxyModel = MySortFilterProxyModel(self.treeWidget) self.treeWidget.setModel(proxyModel) self.treeWidget.selectionModel().selectionChanged.connect( self.updateSelection) self.treeWidget.viewport().setMouseTracking(True) splitterH = QSplitter(Qt.Horizontal, splitter) box = gui.widgetBox(splitterH, "Description") self.infoGDS = gui.widgetLabel(box, "") self.infoGDS.setWordWrap(True) gui.rubber(box) box = gui.widgetBox(splitterH, "Sample Annotations") self.annotationsTree = QTreeWidget(box) self.annotationsTree.setHeaderLabels( ["Type (Sample annotations)", "Sample count"]) self.annotationsTree.setRootIsDecorated(True) box.layout().addWidget(self.annotationsTree) self.annotationsTree.itemChanged.connect( self.annotationSelectionChanged) self._annotationsUpdating = False self.splitters = splitter, splitterH for sp, setting in zip(self.splitters, self.splitterSettings): sp.splitterMoved.connect(self.splitterMoved) sp.restoreState(setting) self.searchKeys = [ "dataset_id", "title", "platform_organism", "description" ] self.gds = [] self.gds_info = None self.resize(1000, 600) self.setBlocking(True) self.setEnabled(False) self.progressBarInit() self._executor = ThreadExecutor() func = partial(get_gds_model, methodinvoke(self, "_setProgress", (float, ))) self._inittask = Task(function=func) self._inittask.finished.connect(self._initializemodel) self._executor.submit(self._inittask) self._datatask = None @Slot(float) def _setProgress(self, value): self.progressBarValue = value def _initializemodel(self): assert self.thread() is QThread.currentThread() model, self.gds_info, self.gds = self._inittask.result() model.setParent(self) proxy = self.treeWidget.model() proxy.setFilterKeyColumn(0) proxy.setFilterRole(TextFilterRole) proxy.setFilterCaseSensitivity(False) proxy.setFilterFixedString(self.filterString) proxy.setSourceModel(model) proxy.sort(0, Qt.DescendingOrder) self.progressBarFinished() self.setBlocking(False) self.setEnabled(True) filter_items = " ".join(gds[key] for gds in self.gds for key in self.searchKeys) tr_chars = ",.:;!?(){}[]_-+\\|/%#@$^&*<>~`" tr_table = str.maketrans(tr_chars, " " * len(tr_chars)) filter_items = filter_items.translate(tr_table) filter_items = sorted(set(filter_items.split(" "))) filter_items = [item for item in filter_items if len(item) > 3] self.completer.setTokenList(filter_items) if self.currentGds: current_id = self.currentGds["dataset_id"] gdss = [(i, qunpack(proxy.data(proxy.index(i, 1), Qt.DisplayRole))) for i in range(proxy.rowCount())] current = [i for i, data in gdss if data and data == current_id] if current: current_index = proxy.index(current[0], 0) self.treeWidget.selectionModel().select( current_index, QItemSelectionModel.Select | QItemSelectionModel.Rows) self.treeWidget.scrollTo(current_index, QTreeView.PositionAtCenter) for i in range(8): self.treeWidget.resizeColumnToContents(i) self.treeWidget.setColumnWidth( 1, min(self.treeWidget.columnWidth(1), 300)) self.treeWidget.setColumnWidth( 2, min(self.treeWidget.columnWidth(2), 200)) self.updateInfo() def updateInfo(self): gds_info = self.gds_info text = ("%i datasets\n%i datasets cached\n" % (len(gds_info), len(glob.glob(serverfiles.localpath("GEO") + "/GDS*")))) filtered = self.treeWidget.model().rowCount() if len(self.gds) != filtered: text += ("%i after filtering") % filtered self.infoBox.setText(text) def updateSelection(self, *args): current = self.treeWidget.selectedIndexes() mapToSource = self.treeWidget.model().mapToSource current = [mapToSource(index).row() for index in current] if current: self.currentGds = self.gds[current[0]] self.setAnnotations(self.currentGds) self.infoGDS.setText(self.currentGds.get("description", "")) self.nameEdit.setPlaceholderText(self.currentGds["title"]) self.datasetName = \ self.datasetNames.get(self.currentGds["dataset_id"], "") else: self.currentGds = None self.nameEdit.setPlaceholderText("") self.datasetName = "" self.commitIf() def setAnnotations(self, gds): self._annotationsUpdating = True self.annotationsTree.clear() annotations = defaultdict(set) subsetscount = {} for desc in gds["subsets"]: annotations[desc["type"]].add(desc["description"]) subsetscount[desc["description"]] = str(len(desc["sample_id"])) for type, subsets in annotations.items(): key = (gds["dataset_id"], type) subsetItem = QTreeWidgetItem(self.annotationsTree, [type]) subsetItem.setFlags(subsetItem.flags() | Qt.ItemIsUserCheckable | Qt.ItemIsTristate) subsetItem.setCheckState( 0, self.gdsSelectionStates.get(key, Qt.Checked)) subsetItem.key = key for subset in subsets: key = (gds["dataset_id"], type, subset) item = QTreeWidgetItem( subsetItem, [subset, subsetscount.get(subset, "")]) item.setFlags(item.flags() | Qt.ItemIsUserCheckable) item.setCheckState( 0, self.gdsSelectionStates.get(key, Qt.Checked)) item.key = key self._annotationsUpdating = False self.annotationsTree.expandAll() for i in range(self.annotationsTree.columnCount()): self.annotationsTree.resizeColumnToContents(i) def annotationSelectionChanged(self, item, column): if self._annotationsUpdating: return for i in range(self.annotationsTree.topLevelItemCount()): item = self.annotationsTree.topLevelItem(i) self.gdsSelectionStates[item.key] = item.checkState(0) for j in range(item.childCount()): child = item.child(j) self.gdsSelectionStates[child.key] = child.checkState(0) def filter(self): filter_string = unicode(self.filterLineEdit.text()) proxyModel = self.treeWidget.model() if proxyModel: strings = filter_string.lower().strip().split() proxyModel.setFilterFixedStrings(strings) self.updateInfo() def selectedSamples(self): """ Return the currently selected sample annotations. The return value is a list of selected (sample type, sample value) tuples. .. note:: if some Sample annotation type has no selected values. this method will return all values for it. """ samples = [] unused_types = [] used_types = [] for stype in childiter(self.annotationsTree.invisibleRootItem()): selected_values = [] all_values = [] for sval in childiter(stype): value = (str(stype.text(0)), str(sval.text(0))) if self.gdsSelectionStates.get(sval.key, True): selected_values.append(value) all_values.append(value) if selected_values: samples.extend(selected_values) used_types.append(str(stype.text(0))) else: # If no sample of sample type is selected we don't filter # on it. samples.extend(all_values) unused_types.append(str(stype.text(0))) return samples, used_types def commitIf(self): if self.autoCommit: self.commit() else: self.selectionChanged = True @Slot(int, int) def progressCompleted(self, value, total): if total > 0: self.progressBarSet(100. * value / total, processEvents=False) else: pass # TODO: report 'indeterminate progress' def commit(self): if self.currentGds: self.error(0) sample_type = None self.progressBarInit(processEvents=None) _, groups = self.selectedSamples() if len(groups) == 1 and self.outputRows: sample_type = groups[0] self.setEnabled(False) self.setBlocking(True) progress = methodinvoke(self, "progressCompleted", (int, int)) def get_data(gds_id, report_genes, transpose, sample_type, title): gds_ensure_downloaded(gds_id, progress) gds = geo.GDS(gds_id) data = gds.getdata(report_genes=report_genes, transpose=transpose, sample_type=sample_type) data.name = title return data get_data = partial(get_data, self.currentGds["dataset_id"], report_genes=self.mergeSpots, transpose=self.outputRows, sample_type=sample_type, title=self.datasetName or self.currentGds["title"]) self._datatask = Task(function=get_data) self._datatask.finished.connect(self._on_dataready) self._executor.submit(self._datatask) def _on_dataready(self): self.setEnabled(True) self.setBlocking(False) self.progressBarFinished(processEvents=False) try: data = self._datatask.result() except urlrequest.URLError as error: self.error(0, ("Error while connecting to the NCBI ftp server! " "'%s'" % error)) sys.excepthook(type(error), error, getattr(error, "__traceback__")) return finally: self._datatask = None data_name = data.name samples, _ = self.selectedSamples() self.warning(0) message = None if self.outputRows: def samplesinst(ex): out = [] for meta in data.domain.metas: out.append((meta.name, ex[meta].value)) if data.domain.class_var.name != 'class': out.append((data.domain.class_var.name, ex[data.domain.class_var].value)) return out samples = set(samples) mask = [samples.issuperset(samplesinst(ex)) for ex in data] data = data[numpy.array(mask, dtype=bool)] if len(data) == 0: message = "No samples with selected sample annotations." else: samples = set(samples) domain = Orange.data.Domain([ attr for attr in data.domain.attributes if samples.issuperset(attr.attributes.items()) ], data.domain.class_var, data.domain.metas) # domain.addmetas(data.domain.getmetas()) if len(domain.attributes) == 0: message = "No samples with selected sample annotations." stypes = set(s[0] for s in samples) for attr in domain.attributes: attr.attributes = dict( (key, value) for key, value in attr.attributes.items() if key in stypes) data = Orange.data.Table(domain, data) if message is not None: self.warning(0, message) data_hints.set_hint(data, "taxid", self.currentGds.get("taxid", ""), 10.0) data_hints.set_hint(data, "genesinrows", self.outputRows, 10.0) data.name = data_name self.send("Expression Data", data) model = self.treeWidget.model().sourceModel() row = self.gds.index(self.currentGds) model.setData(model.index(row, 0), " ", Qt.DisplayRole) self.updateInfo() self.selectionChanged = False def splitterMoved(self, *args): self.splitterSettings = [ bytes(sp.saveState()) for sp in self.splitters ] def send_report(self): self.report_items("GEO Dataset", [("ID", self.currentGds['dataset_id']), ("Title", self.currentGds['title']), ("Organism", self.currentGds['sample_organism'])]) self.report_items("Data", [("Samples", self.currentGds['sample_count']), ("Features", self.currentGds['feature_count']), ("Genes", self.currentGds['gene_count'])]) self.report_name("Sample annotations") subsets = defaultdict(list) for subset in self.currentGds['subsets']: subsets[subset['type']].append( (subset['description'], len(subset['sample_id']))) self.report_html += "<ul>" for type in subsets: self.report_html += "<b>" + type + ":</b></br>" for desc, count in subsets[type]: self.report_html += 9 * " " + "<b>{}:</b> {}</br>".format( desc, count) self.report_html += "</ul>" def onDeleteWidget(self): if self._inittask: self._inittask.future().cancel() self._inittask.finished.disconnect(self._initializemodel) if self._datatask: self._datatask.future().cancel() self._datatask.finished.disconnect(self._on_dataready) self._executor.shutdown(wait=False) super(OWGEODatasets, self).onDeleteWidget() def onNameEdited(self): if self.currentGds: gds_id = self.currentGds["dataset_id"] self.datasetNames[gds_id] = unicode(self.nameEdit.text()) self.commitIf()
class OWGeneInfo(widget.OWWidget): name = "Gene Info" description = "Displays gene information from NCBI and other sources." icon = "../widgets/icons/GeneInfo.svg" priority = 2010 inputs = [("Data", Orange.data.Table, "setData")] outputs = [("Data Subset", Orange.data.Table)] settingsHandler = settings.DomainContextHandler() organism_index = settings.ContextSetting(0) taxid = settings.ContextSetting("9606") gene_attr = settings.ContextSetting(0) auto_commit = settings.Setting(False) search_string = settings.Setting("") useAttr = settings.ContextSetting(False) useAltSource = settings.ContextSetting(False) def __init__( self, parent=None, ): super().__init__(self, parent) self.selectionChangedFlag = False self.__initialized = False self.initfuture = None self.itemsfuture = None self.infoLabel = gui.widgetLabel( gui.widgetBox(self.controlArea, "Info", addSpace=True), "Initializing\n") self.organisms = None self.organismBox = gui.widgetBox(self.controlArea, "Organism", addSpace=True) self.organismComboBox = gui.comboBox( self.organismBox, self, "organism_index", callback=self._onSelectedOrganismChanged) # For now only support one alt source, with a checkbox # In the future this can be extended to multiple selections self.altSourceCheck = gui.checkBox(self.organismBox, self, "useAltSource", "Show information from dictyBase", callback=self.onAltSourceChange) self.altSourceCheck.hide() box = gui.widgetBox(self.controlArea, "Gene names", addSpace=True) self.geneAttrComboBox = gui.comboBox(box, self, "gene_attr", "Gene attribute", callback=self.updateInfoItems) self.geneAttrComboBox.setEnabled(not self.useAttr) cb = gui.checkBox(box, self, "useAttr", "Use attribute names", callback=self.updateInfoItems) cb.toggled[bool].connect(self.geneAttrComboBox.setDisabled) gui.auto_commit(self.controlArea, self, "auto_commit", "Commit") # A label for dictyExpress link (Why oh god why???) self.dictyExpressBox = gui.widgetBox(self.controlArea, "Dicty Express") self.linkLabel = gui.widgetLabel(self.dictyExpressBox, "") self.linkLabel.setOpenExternalLinks(False) self.linkLabel.linkActivated.connect(self.onDictyExpressLink) self.dictyExpressBox.hide() gui.rubber(self.controlArea) gui.lineEdit(self.mainArea, self, "search_string", "Filter", callbackOnType=True, callback=self.searchUpdate) self.treeWidget = QTreeView(self.mainArea, selectionMode=QTreeView.ExtendedSelection, rootIsDecorated=False, uniformRowHeights=True, sortingEnabled=True) self.treeWidget.setItemDelegate( gui.LinkStyledItemDelegate(self.treeWidget)) self.treeWidget.viewport().setMouseTracking(True) self.mainArea.layout().addWidget(self.treeWidget) box = gui.widgetBox(self.mainArea, "", orientation="horizontal") gui.button(box, self, "Select Filtered", callback=self.selectFiltered) gui.button(box, self, "Clear Selection", callback=self.treeWidget.clearSelection) self.geneinfo = [] self.cells = [] self.row2geneinfo = {} self.data = None # : (# input genes, # matches genes) self.matchedInfo = 0, 0 self.setBlocking(True) self.executor = ThreadExecutor(self) self.progressBarInit() task = Task( function=partial(taxonomy.ensure_downloaded, callback=methodinvoke(self, "advance", ()))) task.resultReady.connect(self.initialize) task.exceptionReady.connect(self._onInitializeError) self.initfuture = self.executor.submit(task) def sizeHint(self): return QSize(1024, 720) @Slot() def advance(self): assert self.thread() is QThread.currentThread() self.progressBarSet(self.progressBarValue + 1, processEvents=None) def initialize(self): if self.__initialized: # Already initialized return self.__initialized = True self.organisms = sorted( set([ name.split(".")[-2] for name in serverfiles.listfiles("NCBI_geneinfo") ] + gene.NCBIGeneInfo.common_taxids())) self.organismComboBox.addItems( [taxonomy.name(tax_id) for tax_id in self.organisms]) if self.taxid in self.organisms: self.organism_index = self.organisms.index(self.taxid) else: self.organism_index = 0 self.taxid = self.organisms[self.organism_index] self.altSourceCheck.setVisible(self.taxid == DICTY_TAXID) self.dictyExpressBox.setVisible(self.taxid == DICTY_TAXID) self.infoLabel.setText("No data on input\n") self.initfuture = None self.setBlocking(False) self.progressBarFinished(processEvents=None) def _onInitializeError(self, exc): sys.excepthook(type(exc), exc, None) self.error(0, "Could not download the necessary files.") def _onSelectedOrganismChanged(self): assert 0 <= self.organism_index <= len(self.organisms) self.taxid = self.organisms[self.organism_index] self.altSourceCheck.setVisible(self.taxid == DICTY_TAXID) self.dictyExpressBox.setVisible(self.taxid == DICTY_TAXID) if self.data is not None: self.updateInfoItems() def setData(self, data=None): if not self.__initialized: self.initfuture.result() self.initialize() if self.itemsfuture is not None: raise Exception("Already processing") self.closeContext() self.data = data if data is not None: self.geneAttrComboBox.clear() self.attributes = \ [attr for attr in data.domain.variables + data.domain.metas if isinstance(attr, (Orange.data.StringVariable, Orange.data.DiscreteVariable))] for var in self.attributes: self.geneAttrComboBox.addItem(*gui.attributeItem(var)) self.taxid = data_hints.get_hint(self.data, "taxid", self.taxid) self.useAttr = data_hints.get_hint(self.data, "genesinrows", self.useAttr) self.openContext(data) self.gene_attr = min(self.gene_attr, len(self.attributes) - 1) if self.taxid in self.organisms: self.organism_index = self.organisms.index(self.taxid) else: self.organism_index = 0 self.taxid = self.organisms[self.organism_index] self.updateInfoItems() else: self.clear() def infoSource(self): """ Return the current selected info source getter function from INFO_SOURCES """ org = self.organisms[min(self.organism_index, len(self.organisms) - 1)] if org not in INFO_SOURCES: org = "default" sources = INFO_SOURCES[org] name, func = sources[min(self.useAltSource, len(sources) - 1)] return name, func def inputGenes(self): if self.useAttr: genes = [attr.name for attr in self.data.domain.attributes] elif self.attributes: attr = self.attributes[self.gene_attr] genes = [ str(ex[attr]) for ex in self.data if not math.isnan(ex[attr]) ] else: genes = [] return genes def updateInfoItems(self): self.warning(0) if self.data is None: return genes = self.inputGenes() if self.useAttr: genes = [attr.name for attr in self.data.domain.attributes] elif self.attributes: attr = self.attributes[self.gene_attr] genes = [ str(ex[attr]) for ex in self.data if not math.isnan(ex[attr]) ] else: genes = [] if not genes: self.warning(0, "Could not extract genes from input dataset.") self.warning(1) org = self.organisms[min(self.organism_index, len(self.organisms) - 1)] source_name, info_getter = self.infoSource() self.error(0) self.updateDictyExpressLink(genes, show=org == DICTY_TAXID) self.altSourceCheck.setVisible(org == DICTY_TAXID) self.progressBarInit() self.setBlocking(True) self.setEnabled(False) self.infoLabel.setText("Retrieving info records.\n") self.genes = genes task = Task(function=partial( info_getter, org, genes, advance=methodinvoke(self, "advance", ( )))) self.itemsfuture = self.executor.submit(task) task.finished.connect(self._onItemsCompleted) def _onItemsCompleted(self): self.setBlocking(False) self.progressBarFinished() self.setEnabled(True) try: schema, geneinfo = self.itemsfuture.result() finally: self.itemsfuture = None self.geneinfo = geneinfo = list(zip(self.genes, geneinfo)) self.cells = cells = [] self.row2geneinfo = {} links = [] for i, (_, gi) in enumerate(geneinfo): if gi: row = [] for _, item in zip(schema, gi): if isinstance(item, Link): # TODO: This should be handled by delegates row.append(item.text) links.append(item.link) else: row.append(item) cells.append(row) self.row2geneinfo[len(cells) - 1] = i model = TreeModel(cells, [str(col) for col in schema], None) model.setColumnLinks(0, links) proxyModel = QSortFilterProxyModel(self) proxyModel.setSourceModel(model) self.treeWidget.setModel(proxyModel) self.treeWidget.selectionModel().selectionChanged.connect(self.commit) for i in range(7): self.treeWidget.resizeColumnToContents(i) self.treeWidget.setColumnWidth( i, min(self.treeWidget.columnWidth(i), 200)) self.infoLabel.setText("%i genes\n%i matched NCBI's IDs" % (len(self.genes), len(cells))) self.matchedInfo = len(self.genes), len(cells) def clear(self): self.infoLabel.setText("No data on input\n") self.treeWidget.setModel( TreeModel([], [ "NCBI ID", "Symbol", "Locus Tag", "Chromosome", "Description", "Synonyms", "Nomenclature" ], self.treeWidget)) self.geneAttrComboBox.clear() self.send("Data Subset", None) def commit(self): if self.data is None: self.send("Data Subset", None) return model = self.treeWidget.model() selection = self.treeWidget.selectionModel().selection() selection = model.mapSelectionToSource(selection) selectedRows = list( chain(*(range(r.top(), r.bottom() + 1) for r in selection))) model = model.sourceModel() selectedGeneids = [self.row2geneinfo[row] for row in selectedRows] selectedIds = [self.geneinfo[i][0] for i in selectedGeneids] selectedIds = set(selectedIds) gene2row = dict((self.geneinfo[self.row2geneinfo[row]][0], row) for row in selectedRows) isselected = selectedIds.__contains__ if self.useAttr: def is_selected(attr): return attr.name in selectedIds attrs = [ attr for attr in self.data.domain.attributes if isselected(attr.name) ] domain = Orange.data.Domain(attrs, self.data.domain.class_vars, self.data.domain.metas) newdata = self.data.from_table(domain, self.data) self.send("Data Subset", newdata) elif self.attributes: attr = self.attributes[self.gene_attr] gene_col = [ attr.str_val(v) for v in self.data.get_column_view(attr)[0] ] gene_col = [(i, name) for i, name in enumerate(gene_col) if isselected(name)] indices = [i for i, _ in gene_col] # Add a gene info columns to the output headers = [ str(model.headerData(i, Qt.Horizontal, Qt.DisplayRole)) for i in range(model.columnCount()) ] metas = [Orange.data.StringVariable(name) for name in headers] domain = Orange.data.Domain(self.data.domain.attributes, self.data.domain.class_vars, self.data.domain.metas + tuple(metas)) newdata = self.data.from_table(domain, self.data)[indices] model_rows = [gene2row[gene] for _, gene in gene_col] for col, meta in zip(range(model.columnCount()), metas): col_data = [ str(model.index(row, col).data(Qt.DisplayRole)) for row in model_rows ] col_data = np.array(col_data, dtype=object, ndmin=2).T newdata[:, meta] = col_data if not len(newdata): newdata = None self.send("Data Subset", newdata) else: self.send("Data Subset", None) def rowFiltered(self, row): searchStrings = self.search_string.lower().split() row = " ".join(self.cells[row]).lower() return not all([s in row for s in searchStrings]) def searchUpdate(self): if not self.data: return searchStrings = self.search_string.lower().split() index = self.treeWidget.model().sourceModel().index mapFromSource = self.treeWidget.model().mapFromSource for i, row in enumerate(self.cells): row = " ".join(row).lower() self.treeWidget.setRowHidden( mapFromSource(index(i, 0)).row(), QModelIndex(), not all([s in row for s in searchStrings])) def selectFiltered(self): if not self.data: return itemSelection = QItemSelection() index = self.treeWidget.model().sourceModel().index mapFromSource = self.treeWidget.model().mapFromSource for i, row in enumerate(self.cells): if not self.rowFiltered(i): itemSelection.select(mapFromSource(index(i, 0)), mapFromSource(index(i, 0))) self.treeWidget.selectionModel().select( itemSelection, QItemSelectionModel.Select | QItemSelectionModel.Rows) def updateDictyExpressLink(self, genes, show=False): def fix(ddb): if ddb.startswith("DDB"): if not ddb.startswith("DDB_G"): ddb = ddb.replace("DDB", "DDB_G") return ddb return None if show: genes = [fix(gene) for gene in genes if fix(gene)] link1 = '<a href="http://dictyexpress.biolab.si/run/index.php?gene=%s">Microarray profile</a>' link2 = '<a href="http://dictyexpress.biolab.si/run/index.php?gene=%s&db=rnaseq">RNA-Seq profile</a>' self.linkLabel.setText(link1 + "<br/>" + link2) show = any(genes) if show: self.dictyExpressBox.show() else: self.dictyExpressBox.hide() def onDictyExpressLink(self, link): if not self.data: return selectedIndexes = self.treeWidget.selectedIndexes() if not len(selectedIndexes): QMessageBox.information(self, "No gene ids selected", "Please select some genes and try again.") return model = self.treeWidget.model() mapToSource = model.mapToSource selectedRows = self.treeWidget.selectedIndexes() selectedRows = [mapToSource(index).row() for index in selectedRows] model = model.sourceModel() selectedGeneids = [self.row2geneinfo[row] for row in selectedRows] selectedIds = [self.geneinfo[i][0] for i in selectedGeneids] selectedIds = set(selectedIds) def fix(ddb): if ddb.startswith("DDB"): if not ddb.startswith("DDB_G"): ddb = ddb.replace("DDB", "DDB_G") return ddb return None genes = [fix(gene) for gene in selectedIds if fix(gene)] url = str(link) % " ".join(genes) QDesktopServices.openUrl(QUrl(url)) def onAltSourceChange(self): self.updateInfoItems() def onDeleteWidget(self): # try to cancel pending tasks if self.initfuture: self.initfuture.cancel() if self.itemsfuture: self.itemsfuture.cancel() self.executor.shutdown(wait=False) super().onDeleteWidget()
class OWGEODatasets(OWWidget): name = "GEO Data Sets" description = DESCRIPTION icon = "../widgets/icons/GEODataSets.svg" priority = PRIORITY inputs = [] outputs = [("Expression Data", Orange.data.Table)] settingsList = ["outputRows", "mergeSpots", "gdsSelectionStates", "splitterSettings", "currentGds", "autoCommit", "datasetNames"] outputRows = Setting(True) mergeSpots = Setting(True) gdsSelectionStates = Setting({}) currentGds = Setting(None) datasetNames = Setting({}) splitterSettings = Setting( (b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xea\x00\x00\x00\xd7\x01\x00\x00\x00\x07\x01\x00\x00\x00\x02', b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xb5\x00\x00\x02\x10\x01\x00\x00\x00\x07\x01\x00\x00\x00\x01') ) autoCommit = Setting(False) def __init__(self, parent=None, signalManager=None, name=" GEO Data Sets"): OWWidget.__init__(self, parent, signalManager, name) self.selectionChanged = False self.filterString = "" self.datasetName = "" ## GUI box = gui.widgetBox(self.controlArea, "Info", addSpace=True) self.infoBox = gui.widgetLabel(box, "Initializing\n\n") box = gui.widgetBox(self.controlArea, "Output", addSpace=True) gui.radioButtonsInBox(box, self, "outputRows", ["Genes in rows", "Samples in rows"], "Rows", callback=self.commitIf) gui.checkBox(box, self, "mergeSpots", "Merge spots of same gene", callback=self.commitIf) gui.separator(box) self.nameEdit = gui.lineEdit( box, self, "datasetName", "Data set name", tooltip="Override the default output data set name", callback=self.onNameEdited ) self.nameEdit.setPlaceholderText("") if sys.version_info < (3, ): box = gui.widgetBox(self.controlArea, "Commit", addSpace=True) self.commitButton = gui.button( box, self, "Commit", callback=self.commit) cb = gui.checkBox(box, self, "autoCommit", "Commit on any change") gui.setStopper(self, self.commitButton, cb, "selectionChanged", self.commit) else: gui.auto_commit(self.controlArea, self, "autoCommit", "Commit", box="Commit") self.commitIf = self.commit gui.rubber(self.controlArea) gui.widgetLabel(self.mainArea, "Filter") self.filterLineEdit = QLineEdit( textChanged=self.filter ) self.completer = TokenListCompleter( self, caseSensitivity=Qt.CaseInsensitive ) self.filterLineEdit.setCompleter(self.completer) self.mainArea.layout().addWidget(self.filterLineEdit) splitter = QSplitter(Qt.Vertical, self.mainArea) self.mainArea.layout().addWidget(splitter) self.treeWidget = QTreeView(splitter) self.treeWidget.setSelectionMode(QTreeView.SingleSelection) self.treeWidget.setRootIsDecorated(False) self.treeWidget.setSortingEnabled(True) self.treeWidget.setAlternatingRowColors(True) self.treeWidget.setUniformRowHeights(True) self.treeWidget.setEditTriggers(QTreeView.NoEditTriggers) linkdelegate = LinkStyledItemDelegate(self.treeWidget) self.treeWidget.setItemDelegateForColumn(1, linkdelegate) self.treeWidget.setItemDelegateForColumn(8, linkdelegate) self.treeWidget.setItemDelegateForColumn( 0, gui.IndicatorItemDelegate(self.treeWidget, role=Qt.DisplayRole)) proxyModel = MySortFilterProxyModel(self.treeWidget) self.treeWidget.setModel(proxyModel) self.treeWidget.selectionModel().selectionChanged.connect( self.updateSelection ) self.treeWidget.viewport().setMouseTracking(True) splitterH = QSplitter(Qt.Horizontal, splitter) box = gui.widgetBox(splitterH, "Description") self.infoGDS = gui.widgetLabel(box, "") self.infoGDS.setWordWrap(True) gui.rubber(box) box = gui.widgetBox(splitterH, "Sample Annotations") self.annotationsTree = QTreeWidget(box) self.annotationsTree.setHeaderLabels( ["Type (Sample annotations)", "Sample count"] ) self.annotationsTree.setRootIsDecorated(True) box.layout().addWidget(self.annotationsTree) self.annotationsTree.itemChanged.connect( self.annotationSelectionChanged ) self._annotationsUpdating = False self.splitters = splitter, splitterH for sp, setting in zip(self.splitters, self.splitterSettings): sp.splitterMoved.connect(self.splitterMoved) sp.restoreState(setting) self.searchKeys = ["dataset_id", "title", "platform_organism", "description"] self.gds = [] self.gds_info = None self.resize(1000, 600) self.setBlocking(True) self.setEnabled(False) self.progressBarInit() self._executor = ThreadExecutor() func = partial(get_gds_model, methodinvoke(self, "_setProgress", (float,))) self._inittask = Task(function=func) self._inittask.finished.connect(self._initializemodel) self._executor.submit(self._inittask) self._datatask = None @Slot(float) def _setProgress(self, value): self.progressBarValue = value def _initializemodel(self): assert self.thread() is QThread.currentThread() model, self.gds_info, self.gds = self._inittask.result() model.setParent(self) proxy = self.treeWidget.model() proxy.setFilterKeyColumn(0) proxy.setFilterRole(TextFilterRole) proxy.setFilterCaseSensitivity(False) proxy.setFilterFixedString(self.filterString) proxy.setSourceModel(model) proxy.sort(0, Qt.DescendingOrder) self.progressBarFinished() self.setBlocking(False) self.setEnabled(True) filter_items = " ".join( gds[key] for gds in self.gds for key in self.searchKeys ) tr_chars = ",.:;!?(){}[]_-+\\|/%#@$^&*<>~`" tr_table = str.maketrans(tr_chars, " " * len(tr_chars)) filter_items = filter_items.translate(tr_table) filter_items = sorted(set(filter_items.split(" "))) filter_items = [item for item in filter_items if len(item) > 3] self.completer.setTokenList(filter_items) if self.currentGds: current_id = self.currentGds["dataset_id"] gdss = [(i, qunpack(proxy.data(proxy.index(i, 1), Qt.DisplayRole))) for i in range(proxy.rowCount())] current = [i for i, data in gdss if data and data == current_id] if current: current_index = proxy.index(current[0], 0) self.treeWidget.selectionModel().select( current_index, QItemSelectionModel.Select | QItemSelectionModel.Rows ) self.treeWidget.scrollTo( current_index, QTreeView.PositionAtCenter) for i in range(8): self.treeWidget.resizeColumnToContents(i) self.treeWidget.setColumnWidth( 1, min(self.treeWidget.columnWidth(1), 300)) self.treeWidget.setColumnWidth( 2, min(self.treeWidget.columnWidth(2), 200)) self.updateInfo() def updateInfo(self): gds_info = self.gds_info text = ("%i datasets\n%i datasets cached\n" % (len(gds_info), len(glob.glob(serverfiles.localpath("GEO") + "/GDS*")))) filtered = self.treeWidget.model().rowCount() if len(self.gds) != filtered: text += ("%i after filtering") % filtered self.infoBox.setText(text) def updateSelection(self, *args): current = self.treeWidget.selectedIndexes() mapToSource = self.treeWidget.model().mapToSource current = [mapToSource(index).row() for index in current] if current: self.currentGds = self.gds[current[0]] self.setAnnotations(self.currentGds) self.infoGDS.setText(self.currentGds.get("description", "")) self.nameEdit.setPlaceholderText(self.currentGds["title"]) self.datasetName = \ self.datasetNames.get(self.currentGds["dataset_id"], "") else: self.currentGds = None self.nameEdit.setPlaceholderText("") self.datasetName = "" self.commitIf() def setAnnotations(self, gds): self._annotationsUpdating = True self.annotationsTree.clear() annotations = defaultdict(set) subsetscount = {} for desc in gds["subsets"]: annotations[desc["type"]].add(desc["description"]) subsetscount[desc["description"]] = str(len(desc["sample_id"])) for type, subsets in annotations.items(): key = (gds["dataset_id"], type) subsetItem = QTreeWidgetItem(self.annotationsTree, [type]) subsetItem.setFlags(subsetItem.flags() | Qt.ItemIsUserCheckable | Qt.ItemIsTristate) subsetItem.setCheckState( 0, self.gdsSelectionStates.get(key, Qt.Checked) ) subsetItem.key = key for subset in subsets: key = (gds["dataset_id"], type, subset) item = QTreeWidgetItem( subsetItem, [subset, subsetscount.get(subset, "")] ) item.setFlags(item.flags() | Qt.ItemIsUserCheckable) item.setCheckState( 0, self.gdsSelectionStates.get(key, Qt.Checked) ) item.key = key self._annotationsUpdating = False self.annotationsTree.expandAll() for i in range(self.annotationsTree.columnCount()): self.annotationsTree.resizeColumnToContents(i) def annotationSelectionChanged(self, item, column): if self._annotationsUpdating: return for i in range(self.annotationsTree.topLevelItemCount()): item = self.annotationsTree.topLevelItem(i) self.gdsSelectionStates[item.key] = item.checkState(0) for j in range(item.childCount()): child = item.child(j) self.gdsSelectionStates[child.key] = child.checkState(0) def filter(self): filter_string = unicode(self.filterLineEdit.text()) proxyModel = self.treeWidget.model() if proxyModel: strings = filter_string.lower().strip().split() proxyModel.setFilterFixedStrings(strings) self.updateInfo() def selectedSamples(self): """ Return the currently selected sample annotations. The return value is a list of selected (sample type, sample value) tuples. .. note:: if some Sample annotation type has no selected values. this method will return all values for it. """ samples = [] unused_types = [] used_types = [] for stype in childiter(self.annotationsTree.invisibleRootItem()): selected_values = [] all_values = [] for sval in childiter(stype): value = (str(stype.text(0)), str(sval.text(0))) if self.gdsSelectionStates.get(sval.key, True): selected_values.append(value) all_values.append(value) if selected_values: samples.extend(selected_values) used_types.append(str(stype.text(0))) else: # If no sample of sample type is selected we don't filter # on it. samples.extend(all_values) unused_types.append(str(stype.text(0))) return samples, used_types def commitIf(self): if self.autoCommit: self.commit() else: self.selectionChanged = True @Slot(int, int) def progressCompleted(self, value, total): if total > 0: self.progressBarSet(100. * value / total, processEvents=False) else: pass # TODO: report 'indeterminate progress' def commit(self): if self.currentGds: self.error(0) sample_type = None self.progressBarInit(processEvents=None) _, groups = self.selectedSamples() if len(groups) == 1 and self.outputRows: sample_type = groups[0] self.setEnabled(False) self.setBlocking(True) progress = methodinvoke(self, "progressCompleted", (int, int)) def get_data(gds_id, report_genes, transpose, sample_type, title): gds_ensure_downloaded(gds_id, progress) gds = geo.GDS(gds_id) data = gds.getdata( report_genes=report_genes, transpose=transpose, sample_type=sample_type ) data.name = title return data get_data = partial( get_data, self.currentGds["dataset_id"], report_genes=self.mergeSpots, transpose=self.outputRows, sample_type=sample_type, title=self.datasetName or self.currentGds["title"] ) self._datatask = Task(function=get_data) self._datatask.finished.connect(self._on_dataready) self._executor.submit(self._datatask) def _on_dataready(self): self.setEnabled(True) self.setBlocking(False) self.progressBarFinished(processEvents=False) try: data = self._datatask.result() except urlrequest.URLError as error: self.error(0, ("Error while connecting to the NCBI ftp server! " "'%s'" % error)) sys.excepthook(type(error), error, getattr(error, "__traceback__")) return finally: self._datatask = None data_name = data.name samples, _ = self.selectedSamples() self.warning(0) message = None if self.outputRows: def samplesinst(ex): out = [] for meta in data.domain.metas: out.append((meta.name, ex[meta].value)) if data.domain.class_var.name != 'class': out.append((data.domain.class_var.name, ex[data.domain.class_var].value)) return out samples = set(samples) mask = [samples.issuperset(samplesinst(ex)) for ex in data] data = data[numpy.array(mask, dtype=bool)] if len(data) == 0: message = "No samples with selected sample annotations." else: samples = set(samples) domain = Orange.data.Domain( [attr for attr in data.domain.attributes if samples.issuperset(attr.attributes.items())], data.domain.class_var, data.domain.metas ) # domain.addmetas(data.domain.getmetas()) if len(domain.attributes) == 0: message = "No samples with selected sample annotations." stypes = set(s[0] for s in samples) for attr in domain.attributes: attr.attributes = dict( (key, value) for key, value in attr.attributes.items() if key in stypes ) data = Orange.data.Table(domain, data) if message is not None: self.warning(0, message) data_hints.set_hint(data, "taxid", self.currentGds.get("taxid", ""), 10.0) data_hints.set_hint(data, "genesinrows", self.outputRows, 10.0) data.name = data_name self.send("Expression Data", data) model = self.treeWidget.model().sourceModel() row = self.gds.index(self.currentGds) model.setData(model.index(row, 0), " ", Qt.DisplayRole) self.updateInfo() self.selectionChanged = False def splitterMoved(self, *args): self.splitterSettings = [bytes(sp.saveState()) for sp in self.splitters] def send_report(self): self.report_items("GEO Dataset", [("ID", self.currentGds['dataset_id']), ("Title", self.currentGds['title']), ("Organism", self.currentGds['sample_organism'])]) self.report_items("Data", [("Samples", self.currentGds['sample_count']), ("Features", self.currentGds['feature_count']), ("Genes", self.currentGds['gene_count'])]) self.report_name("Sample annotations") subsets = defaultdict(list) for subset in self.currentGds['subsets']: subsets[subset['type']].append((subset['description'], len(subset['sample_id']))) self.report_html += "<ul>" for type in subsets: self.report_html += "<b>" + type + ":</b></br>" for desc, count in subsets[type]: self.report_html += 9 * " " + "<b>{}:</b> {}</br>".format(desc, count) self.report_html += "</ul>" def onDeleteWidget(self): if self._inittask: self._inittask.future().cancel() self._inittask.finished.disconnect(self._initializemodel) if self._datatask: self._datatask.future().cancel() self._datatask.finished.disconnect(self._on_dataready) self._executor.shutdown(wait=False) super(OWGEODatasets, self).onDeleteWidget() def onNameEdited(self): if self.currentGds: gds_id = self.currentGds["dataset_id"] self.datasetNames[gds_id] = unicode(self.nameEdit.text()) self.commitIf()
class OWGeneInfo(widget.OWWidget): name = "Gene Info" description = "Displays gene information from NCBI and other sources." icon = "../widgets/icons/OWGeneInfo.svg" priority = 5 class Inputs: data = Input("Data", Orange.data.Table) class Outputs: selected_genes = Output("Selected Genes", Orange.data.Table) data = Output("Data", Orange.data.Table) settingsHandler = settings.DomainContextHandler() organism_index = settings.ContextSetting(0) taxid = settings.ContextSetting("9606") gene_attr = settings.ContextSetting(0) auto_commit = settings.Setting(False) search_string = settings.Setting("") useAttr = settings.ContextSetting(False) useAltSource = settings.ContextSetting(False) def __init__( self, parent=None, ): super().__init__(self, parent) self.selectionChangedFlag = False self.__initialized = False self.initfuture = None self.itemsfuture = None self.map_input_to_ensembl = None self.infoLabel = gui.widgetLabel( gui.widgetBox(self.controlArea, "Info", addSpace=True), "Initializing\n") self.organisms = None self.organismBox = gui.widgetBox(self.controlArea, "Organism", addSpace=True) self.organismComboBox = gui.comboBox( self.organismBox, self, "organism_index", callback=self._onSelectedOrganismChanged) box = gui.widgetBox(self.controlArea, "Gene names", addSpace=True) self.geneAttrComboBox = gui.comboBox(box, self, "gene_attr", "Gene attribute", callback=self.updateInfoItems) self.geneAttrComboBox.setEnabled(not self.useAttr) self.geneAttrCheckbox = gui.checkBox(box, self, "useAttr", "Use column names", callback=self.updateInfoItems) self.geneAttrCheckbox.toggled[bool].connect( self.geneAttrComboBox.setDisabled) gui.auto_commit(self.controlArea, self, "auto_commit", "Commit") gui.rubber(self.controlArea) gui.lineEdit(self.mainArea, self, "search_string", "Filter", callbackOnType=True, callback=self.searchUpdate) self.treeWidget = QTreeView(self.mainArea) self.treeWidget.setAlternatingRowColors(True) self.treeWidget.setSortingEnabled(True) self.treeWidget.setSelectionMode(QTreeView.ExtendedSelection) self.treeWidget.setUniformRowHeights(True) self.treeWidget.setRootIsDecorated(False) self.treeWidget.setItemDelegateForColumn( HEADER_SCHEMA['NCBI ID'], gui.LinkStyledItemDelegate(self.treeWidget)) self.treeWidget.setItemDelegateForColumn( HEADER_SCHEMA['Ensembl ID'], gui.LinkStyledItemDelegate(self.treeWidget)) self.treeWidget.viewport().setMouseTracking(True) self.mainArea.layout().addWidget(self.treeWidget) box = gui.widgetBox(self.mainArea, "", orientation="horizontal") gui.button(box, self, "Select Filtered", callback=self.selectFiltered) gui.button(box, self, "Clear Selection", callback=self.treeWidget.clearSelection) self.geneinfo = [] self.cells = [] self.row2geneinfo = {} self.data = None # : (# input genes, # matches genes) self.matchedInfo = 0, 0 self.setBlocking(True) self.executor = ThreadExecutor(self) self.progressBarInit() task = Task( function=partial(taxonomy.ensure_downloaded, callback=methodinvoke(self, "advance", ()))) task.resultReady.connect(self.initialize) task.exceptionReady.connect(self._onInitializeError) self.initfuture = self.executor.submit(task) def sizeHint(self): return QSize(1024, 720) @Slot() def advance(self): assert self.thread() is QThread.currentThread() self.progressBarSet(self.progressBarValue + 1, processEvents=None) def _get_available_organisms(self): available_organism = sorted([(tax_id, taxonomy.name(tax_id)) for tax_id in taxonomy.common_taxids()], key=lambda x: x[1]) self.organisms = [tax_id[0] for tax_id in available_organism] self.organismComboBox.addItems( [tax_id[1] for tax_id in available_organism]) def initialize(self): if self.__initialized: # Already initialized return self.__initialized = True self._get_available_organisms() self.organism_index = self.organisms.index(taxonomy.DEFAULT_ORGANISM) self.taxid = self.organisms[self.organism_index] self.infoLabel.setText("No data on input\n") self.initfuture = None self.setBlocking(False) self.progressBarFinished(processEvents=None) def _onInitializeError(self, exc): sys.excepthook(type(exc), exc, None) self.error(0, "Could not download the necessary files.") def _onSelectedOrganismChanged(self): assert 0 <= self.organism_index <= len(self.organisms) self.taxid = self.organisms[self.organism_index] if self.data is not None: self.updateInfoItems() @Inputs.data def setData(self, data=None): if not self.__initialized: self.initfuture.result() self.initialize() if self.itemsfuture is not None: raise Exception("Already processing") self.data = data if data is not None: self.geneAttrComboBox.clear() self.attributes = [ attr for attr in data.domain.variables + data.domain.metas if isinstance(attr, (Orange.data.StringVariable, Orange.data.DiscreteVariable)) ] for var in self.attributes: self.geneAttrComboBox.addItem(*gui.attributeItem(var)) self.taxid = str(self.data.attributes.get(TAX_ID, '')) self.useAttr = self.data.attributes.get(GENE_AS_ATTRIBUTE_NAME, self.useAttr) self.gene_attr = min(self.gene_attr, len(self.attributes) - 1) if self.taxid in self.organisms: self.organism_index = self.organisms.index(self.taxid) self.updateInfoItems() else: self.clear() def updateInfoItems(self): self.warning(0) if self.data is None: return if self.useAttr: genes = [attr.name for attr in self.data.domain.attributes] elif self.attributes: attr = self.attributes[self.gene_attr] genes = [ str(ex[attr]) for ex in self.data if not math.isnan(ex[attr]) ] else: genes = [] if not genes: self.warning(0, "Could not extract genes from input dataset.") self.warning(1) org = self.organisms[min(self.organism_index, len(self.organisms) - 1)] source_name, info_getter = ("NCBI Info", ncbi_info) self.error(0) self.progressBarInit() self.setBlocking(True) self.setEnabled(False) self.infoLabel.setText("Retrieving info records.\n") self.genes = genes task = Task(function=partial( info_getter, org, genes, advance=methodinvoke(self, "advance", ( )))) self.itemsfuture = self.executor.submit(task) task.finished.connect(self._onItemsCompleted) def _onItemsCompleted(self): self.setBlocking(False) self.progressBarFinished() self.setEnabled(True) try: self.map_input_to_ensembl, geneinfo = self.itemsfuture.result() finally: self.itemsfuture = None self.geneinfo = geneinfo self.cells = cells = [] self.row2geneinfo = {} for i, (input_name, gi) in enumerate(geneinfo): if gi: row = [] for item in gi: row.append(item) # parse synonyms row[HEADER_SCHEMA['Synonyms']] = ','.join( row[HEADER_SCHEMA['Synonyms']]) cells.append(row) self.row2geneinfo[len(cells) - 1] = i model = TreeModel(cells, list(HEADER_SCHEMA.keys()), None) proxyModel = QSortFilterProxyModel(self) proxyModel.setSourceModel(model) self.treeWidget.setModel(proxyModel) self.treeWidget.selectionModel().selectionChanged.connect(self.commit) for i in range(len(HEADER_SCHEMA)): self.treeWidget.resizeColumnToContents(i) self.treeWidget.setColumnWidth( i, min(self.treeWidget.columnWidth(i), 200)) self.infoLabel.setText("%i genes\n%i matched NCBI's IDs" % (len(self.genes), len(cells))) self.matchedInfo = len(self.genes), len(cells) if self.useAttr: new_data = self.data.from_table(self.data.domain, self.data) for gene_var in new_data.domain.attributes: gene_var.attributes['Ensembl ID'] = str( self.map_input_to_ensembl[gene_var.name]) self.Outputs.data.send(new_data) elif self.attributes: ensembl_ids = [] for gene_name in self.data.get_column_view( self.attributes[self.gene_attr])[0]: if gene_name and gene_name in self.map_input_to_ensembl: ensembl_ids.append(self.map_input_to_ensembl[gene_name]) else: ensembl_ids.append('') data_with_ensembl = append_columns( self.data, metas=[(Orange.data.StringVariable('Ensembl ID'), ensembl_ids) ]) self.Outputs.data.send(data_with_ensembl) def clear(self): self.infoLabel.setText("No data on input\n") self.treeWidget.setModel( TreeModel([], [ "NCBI ID", "Symbol", "Locus Tag", "Chromosome", "Description", "Synonyms", "Nomenclature" ], self.treeWidget)) self.geneAttrComboBox.clear() self.Outputs.selected_genes.send(None) def commit(self): if self.data is None: self.Outputs.selected_genes.send(None) self.Outputs.data.send(None) return model = self.treeWidget.model() selection = self.treeWidget.selectionModel().selection() selection = model.mapSelectionToSource(selection) selectedRows = list( chain(*(range(r.top(), r.bottom() + 1) for r in selection))) model = model.sourceModel() selectedGeneids = [self.row2geneinfo[row] for row in selectedRows] selectedIds = [self.geneinfo[i][0] for i in selectedGeneids] selectedIds = set(selectedIds) gene2row = dict((self.geneinfo[self.row2geneinfo[row]][0], row) for row in selectedRows) isselected = selectedIds.__contains__ if selectedIds: if self.useAttr: attrs = [ attr for attr in self.data.domain.attributes if isselected(attr.name) ] domain = Orange.data.Domain(attrs, self.data.domain.class_vars, self.data.domain.metas) newdata = self.data.from_table(domain, self.data) self.Outputs.selected_genes.send(newdata) elif self.attributes: attr = self.attributes[self.gene_attr] gene_col = [ attr.str_val(v) for v in self.data.get_column_view(attr)[0] ] gene_col = [(i, name) for i, name in enumerate(gene_col) if isselected(name)] indices = [i for i, _ in gene_col] # SELECTED GENES OUTPUT selected_genes_metas = [ Orange.data.StringVariable(name) for name in gene.GENE_INFO_HEADER_LABELS ] selected_genes_domain = Orange.data.Domain( self.data.domain.attributes, self.data.domain.class_vars, self.data.domain.metas + tuple(selected_genes_metas)) selected_genes_data = self.data.from_table( selected_genes_domain, self.data)[indices] model_rows = [gene2row[gene_name] for _, gene_name in gene_col] for col, meta in zip(range(model.columnCount()), selected_genes_metas): col_data = [ str(model.index(row, col).data(Qt.DisplayRole)) for row in model_rows ] col_data = np.array(col_data, dtype=object, ndmin=2).T selected_genes_data[:, meta] = col_data if not len(selected_genes_data): selected_genes_data = None self.Outputs.selected_genes.send(selected_genes_data) else: self.Outputs.selected_genes.send(None) def rowFiltered(self, row): searchStrings = self.search_string.lower().split() row = " ".join(self.cells[row]).lower() return not all([s in row for s in searchStrings]) def searchUpdate(self): if not self.data: return searchStrings = self.search_string.lower().split() index = self.treeWidget.model().sourceModel().index mapFromSource = self.treeWidget.model().mapFromSource for i, row in enumerate(self.cells): row = " ".join(row).lower() self.treeWidget.setRowHidden( mapFromSource(index(i, 0)).row(), QModelIndex(), not all([s in row for s in searchStrings])) def selectFiltered(self): if not self.data: return itemSelection = QItemSelection() index = self.treeWidget.model().sourceModel().index mapFromSource = self.treeWidget.model().mapFromSource for i, row in enumerate(self.cells): if not self.rowFiltered(i): itemSelection.select(mapFromSource(index(i, 0)), mapFromSource(index(i, 0))) self.treeWidget.selectionModel().select( itemSelection, QItemSelectionModel.Select | QItemSelectionModel.Rows) def onAltSourceChange(self): self.updateInfoItems() def onDeleteWidget(self): # try to cancel pending tasks if self.initfuture: self.initfuture.cancel() if self.itemsfuture: self.itemsfuture.cancel() self.executor.shutdown(wait=False) super().onDeleteWidget()
class OWGeneInfo(widget.OWWidget): name = "Gene Info" description = "Displays gene information from NCBI and other sources." icon = "../widgets/icons/GeneInfo.svg" priority = 2010 inputs = [("Data", Orange.data.Table, "setData")] outputs = [("Data Subset", Orange.data.Table)] settingsHandler = settings.DomainContextHandler() organism_index = settings.ContextSetting(0) taxid = settings.ContextSetting("9606") gene_attr = settings.ContextSetting(0) auto_commit = settings.Setting(False) search_string = settings.Setting("") useAttr = settings.ContextSetting(False) useAltSource = settings.ContextSetting(False) def __init__(self, parent=None, ): super().__init__(self, parent) self.selectionChangedFlag = False self.__initialized = False self.initfuture = None self.itemsfuture = None self.infoLabel = gui.widgetLabel( gui.widgetBox(self.controlArea, "Info", addSpace=True), "Initializing\n" ) self.organisms = None self.organismBox = gui.widgetBox( self.controlArea, "Organism", addSpace=True) self.organismComboBox = gui.comboBox( self.organismBox, self, "organism_index", callback=self._onSelectedOrganismChanged) # For now only support one alt source, with a checkbox # In the future this can be extended to multiple selections self.altSourceCheck = gui.checkBox( self.organismBox, self, "useAltSource", "Show information from dictyBase", callback=self.onAltSourceChange) self.altSourceCheck.hide() box = gui.widgetBox(self.controlArea, "Gene names", addSpace=True) self.geneAttrComboBox = gui.comboBox( box, self, "gene_attr", "Gene attribute", callback=self.updateInfoItems ) self.geneAttrComboBox.setEnabled(not self.useAttr) cb = gui.checkBox(box, self, "useAttr", "Use attribute names", callback=self.updateInfoItems) cb.toggled[bool].connect(self.geneAttrComboBox.setDisabled) gui.auto_commit(self.controlArea, self, "auto_commit", "Commit") # A label for dictyExpress link (Why oh god why???) self.dictyExpressBox = gui.widgetBox( self.controlArea, "Dicty Express") self.linkLabel = gui.widgetLabel(self.dictyExpressBox, "") self.linkLabel.setOpenExternalLinks(False) self.linkLabel.linkActivated.connect(self.onDictyExpressLink) self.dictyExpressBox.hide() gui.rubber(self.controlArea) gui.lineEdit(self.mainArea, self, "search_string", "Filter", callbackOnType=True, callback=self.searchUpdate) self.treeWidget = QTreeView( self.mainArea, selectionMode=QTreeView.ExtendedSelection, rootIsDecorated=False, uniformRowHeights=True, sortingEnabled=True) self.treeWidget.setItemDelegate( gui.LinkStyledItemDelegate(self.treeWidget)) self.treeWidget.viewport().setMouseTracking(True) self.mainArea.layout().addWidget(self.treeWidget) box = gui.widgetBox(self.mainArea, "", orientation="horizontal") gui.button(box, self, "Select Filtered", callback=self.selectFiltered) gui.button(box, self, "Clear Selection", callback=self.treeWidget.clearSelection) self.geneinfo = [] self.cells = [] self.row2geneinfo = {} self.data = None # : (# input genes, # matches genes) self.matchedInfo = 0, 0 self.setBlocking(True) self.executor = ThreadExecutor(self) self.progressBarInit() task = Task( function=partial( taxonomy.ensure_downloaded, callback=methodinvoke(self, "advance", ()) ) ) task.resultReady.connect(self.initialize) task.exceptionReady.connect(self._onInitializeError) self.initfuture = self.executor.submit(task) def sizeHint(self): return QSize(1024, 720) @Slot() def advance(self): assert self.thread() is QThread.currentThread() self.progressBarSet(self.progressBarValue + 1, processEvents=None) def initialize(self): if self.__initialized: # Already initialized return self.__initialized = True self.organisms = sorted( set([name.split(".")[-2] for name in serverfiles.listfiles("NCBI_geneinfo")] + gene.NCBIGeneInfo.common_taxids()) ) self.organismComboBox.addItems( [taxonomy.name(tax_id) for tax_id in self.organisms] ) if self.taxid in self.organisms: self.organism_index = self.organisms.index(self.taxid) else: self.organism_index = 0 self.taxid = self.organisms[self.organism_index] self.altSourceCheck.setVisible(self.taxid == DICTY_TAXID) self.dictyExpressBox.setVisible(self.taxid == DICTY_TAXID) self.infoLabel.setText("No data on input\n") self.initfuture = None self.setBlocking(False) self.progressBarFinished(processEvents=None) def _onInitializeError(self, exc): sys.excepthook(type(exc), exc, None) self.error(0, "Could not download the necessary files.") def _onSelectedOrganismChanged(self): assert 0 <= self.organism_index <= len(self.organisms) self.taxid = self.organisms[self.organism_index] self.altSourceCheck.setVisible(self.taxid == DICTY_TAXID) self.dictyExpressBox.setVisible(self.taxid == DICTY_TAXID) if self.data is not None: self.updateInfoItems() def setData(self, data=None): if not self.__initialized: self.initfuture.result() self.initialize() if self.itemsfuture is not None: raise Exception("Already processing") self.closeContext() self.data = data if data is not None: self.geneAttrComboBox.clear() self.attributes = \ [attr for attr in data.domain.variables + data.domain.metas if isinstance(attr, (Orange.data.StringVariable, Orange.data.DiscreteVariable))] for var in self.attributes: self.geneAttrComboBox.addItem(*gui.attributeItem(var)) self.taxid = data_hints.get_hint(self.data, "taxid", self.taxid) self.useAttr = data_hints.get_hint( self.data, "genesinrows", self.useAttr) self.openContext(data) self.gene_attr = min(self.gene_attr, len(self.attributes) - 1) if self.taxid in self.organisms: self.organism_index = self.organisms.index(self.taxid) else: self.organism_index = 0 self.taxid = self.organisms[self.organism_index] self.updateInfoItems() else: self.clear() def infoSource(self): """ Return the current selected info source getter function from INFO_SOURCES """ org = self.organisms[min(self.organism_index, len(self.organisms) - 1)] if org not in INFO_SOURCES: org = "default" sources = INFO_SOURCES[org] name, func = sources[min(self.useAltSource, len(sources) - 1)] return name, func def inputGenes(self): if self.useAttr: genes = [attr.name for attr in self.data.domain.attributes] elif self.attributes: attr = self.attributes[self.gene_attr] genes = [str(ex[attr]) for ex in self.data if not math.isnan(ex[attr])] else: genes = [] return genes def updateInfoItems(self): self.warning(0) if self.data is None: return genes = self.inputGenes() if self.useAttr: genes = [attr.name for attr in self.data.domain.attributes] elif self.attributes: attr = self.attributes[self.gene_attr] genes = [str(ex[attr]) for ex in self.data if not math.isnan(ex[attr])] else: genes = [] if not genes: self.warning(0, "Could not extract genes from input dataset.") self.warning(1) org = self.organisms[min(self.organism_index, len(self.organisms) - 1)] source_name, info_getter = self.infoSource() self.error(0) self.updateDictyExpressLink(genes, show=org == DICTY_TAXID) self.altSourceCheck.setVisible(org == DICTY_TAXID) self.progressBarInit() self.setBlocking(True) self.setEnabled(False) self.infoLabel.setText("Retrieving info records.\n") self.genes = genes task = Task( function=partial( info_getter, org, genes, advance=methodinvoke(self, "advance", ())) ) self.itemsfuture = self.executor.submit(task) task.finished.connect(self._onItemsCompleted) def _onItemsCompleted(self): self.setBlocking(False) self.progressBarFinished() self.setEnabled(True) try: schema, geneinfo = self.itemsfuture.result() finally: self.itemsfuture = None self.geneinfo = geneinfo = list(zip(self.genes, geneinfo)) self.cells = cells = [] self.row2geneinfo = {} links = [] for i, (_, gi) in enumerate(geneinfo): if gi: row = [] for _, item in zip(schema, gi): if isinstance(item, Link): # TODO: This should be handled by delegates row.append(item.text) links.append(item.link) else: row.append(item) cells.append(row) self.row2geneinfo[len(cells) - 1] = i model = TreeModel(cells, [str(col) for col in schema], None) model.setColumnLinks(0, links) proxyModel = QSortFilterProxyModel(self) proxyModel.setSourceModel(model) self.treeWidget.setModel(proxyModel) self.treeWidget.selectionModel().selectionChanged.connect(self.commit) for i in range(7): self.treeWidget.resizeColumnToContents(i) self.treeWidget.setColumnWidth( i, min(self.treeWidget.columnWidth(i), 200) ) self.infoLabel.setText("%i genes\n%i matched NCBI's IDs" % (len(self.genes), len(cells))) self.matchedInfo = len(self.genes), len(cells) def clear(self): self.infoLabel.setText("No data on input\n") self.treeWidget.setModel( TreeModel([], ["NCBI ID", "Symbol", "Locus Tag", "Chromosome", "Description", "Synonyms", "Nomenclature"], self.treeWidget)) self.geneAttrComboBox.clear() self.send("Data Subset", None) def commit(self): if self.data is None: self.send("Data Subset", None) return model = self.treeWidget.model() selection = self.treeWidget.selectionModel().selection() selection = model.mapSelectionToSource(selection) selectedRows = list( chain(*(range(r.top(), r.bottom() + 1) for r in selection)) ) model = model.sourceModel() selectedGeneids = [self.row2geneinfo[row] for row in selectedRows] selectedIds = [self.geneinfo[i][0] for i in selectedGeneids] selectedIds = set(selectedIds) gene2row = dict((self.geneinfo[self.row2geneinfo[row]][0], row) for row in selectedRows) isselected = selectedIds.__contains__ if self.useAttr: def is_selected(attr): return attr.name in selectedIds attrs = [attr for attr in self.data.domain.attributes if isselected(attr.name)] domain = Orange.data.Domain( attrs, self.data.domain.class_vars, self.data.domain.metas) newdata = self.data.from_table(domain, self.data) self.send("Data Subset", newdata) elif self.attributes: attr = self.attributes[self.gene_attr] gene_col = [attr.str_val(v) for v in self.data.get_column_view(attr)[0]] gene_col = [(i, name) for i, name in enumerate(gene_col) if isselected(name)] indices = [i for i, _ in gene_col] # Add a gene info columns to the output headers = [str(model.headerData(i, Qt.Horizontal, Qt.DisplayRole)) for i in range(model.columnCount())] metas = [Orange.data.StringVariable(name) for name in headers] domain = Orange.data.Domain( self.data.domain.attributes, self.data.domain.class_vars, self.data.domain.metas + tuple(metas)) newdata = self.data.from_table(domain, self.data)[indices] model_rows = [gene2row[gene] for _, gene in gene_col] for col, meta in zip(range(model.columnCount()), metas): col_data = [str(model.index(row, col).data(Qt.DisplayRole)) for row in model_rows] col_data = np.array(col_data, dtype=object, ndmin=2).T newdata[:, meta] = col_data if not len(newdata): newdata = None self.send("Data Subset", newdata) else: self.send("Data Subset", None) def rowFiltered(self, row): searchStrings = self.search_string.lower().split() row = " ".join(self.cells[row]).lower() return not all([s in row for s in searchStrings]) def searchUpdate(self): if not self.data: return searchStrings = self.search_string.lower().split() index = self.treeWidget.model().sourceModel().index mapFromSource = self.treeWidget.model().mapFromSource for i, row in enumerate(self.cells): row = " ".join(row).lower() self.treeWidget.setRowHidden( mapFromSource(index(i, 0)).row(), QModelIndex(), not all([s in row for s in searchStrings])) def selectFiltered(self): if not self.data: return itemSelection = QItemSelection() index = self.treeWidget.model().sourceModel().index mapFromSource = self.treeWidget.model().mapFromSource for i, row in enumerate(self.cells): if not self.rowFiltered(i): itemSelection.select(mapFromSource(index(i, 0)), mapFromSource(index(i, 0))) self.treeWidget.selectionModel().select( itemSelection, QItemSelectionModel.Select | QItemSelectionModel.Rows) def updateDictyExpressLink(self, genes, show=False): def fix(ddb): if ddb.startswith("DDB"): if not ddb.startswith("DDB_G"): ddb = ddb.replace("DDB", "DDB_G") return ddb return None if show: genes = [fix(gene) for gene in genes if fix(gene)] link1 = '<a href="http://dictyexpress.biolab.si/run/index.php?gene=%s">Microarray profile</a>' link2 = '<a href="http://dictyexpress.biolab.si/run/index.php?gene=%s&db=rnaseq">RNA-Seq profile</a>' self.linkLabel.setText(link1 + "<br/>" + link2) show = any(genes) if show: self.dictyExpressBox.show() else: self.dictyExpressBox.hide() def onDictyExpressLink(self, link): if not self.data: return selectedIndexes = self.treeWidget.selectedIndexes() if not len(selectedIndexes): QMessageBox.information( self, "No gene ids selected", "Please select some genes and try again." ) return model = self.treeWidget.model() mapToSource = model.mapToSource selectedRows = self.treeWidget.selectedIndexes() selectedRows = [mapToSource(index).row() for index in selectedRows] model = model.sourceModel() selectedGeneids = [self.row2geneinfo[row] for row in selectedRows] selectedIds = [self.geneinfo[i][0] for i in selectedGeneids] selectedIds = set(selectedIds) def fix(ddb): if ddb.startswith("DDB"): if not ddb.startswith("DDB_G"): ddb = ddb.replace("DDB", "DDB_G") return ddb return None genes = [fix(gene) for gene in selectedIds if fix(gene)] url = str(link) % " ".join(genes) QDesktopServices.openUrl(QUrl(url)) def onAltSourceChange(self): self.updateInfoItems() def onDeleteWidget(self): # try to cancel pending tasks if self.initfuture: self.initfuture.cancel() if self.itemsfuture: self.itemsfuture.cancel() self.executor.shutdown(wait=False) super().onDeleteWidget()
class OWSetEnrichment(widget.OWWidget): name = "Set Enrichment" description = "" icon = "../widgets/icons/GeneSetEnrichment.svg" priority = 5000 inputs = [("Data", Orange.data.Table, "setData", widget.Default), ("Reference", Orange.data.Table, "setReference")] outputs = [("Data subset", Orange.data.Table)] settingsHandler = settings.DomainContextHandler() taxid = settings.ContextSetting(None) speciesIndex = settings.ContextSetting(0) genesinrows = settings.ContextSetting(False) geneattr = settings.ContextSetting(0) categoriesCheckState = settings.ContextSetting({}) useReferenceData = settings.Setting(False) useMinCountFilter = settings.Setting(True) useMaxPValFilter = settings.Setting(True) useMaxFDRFilter = settings.Setting(True) minClusterCount = settings.Setting(3) maxPValue = settings.Setting(0.01) maxFDR = settings.Setting(0.01) autocommit = settings.Setting(False) headerState = settings.Setting(b'') # type: bytes Ready, Initializing, Loading, RunningEnrichment = 0, 1, 2, 4 def __init__(self, parent=None): super().__init__(parent) self.geneMatcherSettings = [False, False, True, False] self.data = None self.referenceData = None self.taxid_list = [] self.__genematcher = (None, fulfill(gene.matcher([]))) self.__invalidated = False self.currentAnnotatedCategories = [] self.state = None self.__state = OWSetEnrichment.Initializing box = gui.widgetBox(self.controlArea, "Info") self.infoBox = gui.widgetLabel(box, "Info") self.infoBox.setText("No data on input.\n") self.speciesComboBox = gui.comboBox( self.controlArea, self, "speciesIndex", "Species", callback=self.__on_speciesIndexChanged) box = gui.widgetBox(self.controlArea, "Entity names") self.geneAttrComboBox = gui.comboBox( box, self, "geneattr", "Entity feature", sendSelectedValue=0, callback=self.updateAnnotations) cb = gui.checkBox( box, self, "genesinrows", "Use feature names", callback=self.updateAnnotations, disables=[(-1, self.geneAttrComboBox)]) cb.makeConsistent() # gui.button(box, self, "Gene matcher settings", # callback=self.updateGeneMatcherSettings, # tooltip="Open gene matching settings dialog") self.referenceRadioBox = gui.radioButtonsInBox( self.controlArea, self, "useReferenceData", ["All entities", "Reference set (input)"], tooltips=["Use entire genome (for gene set enrichment) or all " + "available entities for reference", "Use entities from Reference Examples input signal " + "as reference"], box="Reference", callback=self.updateAnnotations) self.referenceRadioBox.setEnabled(False) box = gui.widgetBox(self.controlArea, "Entity Sets") self.groupsWidget = QTreeWidget(self) self.groupsWidget.setHeaderLabels(["Category"]) box.layout().addWidget(self.groupsWidget) hLayout = QHBoxLayout() hLayout.setSpacing(10) hWidget = gui.widgetBox(self.mainArea, orientation=hLayout) gui.spin(hWidget, self, "minClusterCount", 0, 100, label="Entities", tooltip="Minimum entity count", callback=self.filterAnnotationsChartView, callbackOnReturn=True, checked="useMinCountFilter", checkCallback=self.filterAnnotationsChartView) pvalfilterbox = gui.widgetBox(hWidget, orientation="horizontal") cb = gui.checkBox( pvalfilterbox, self, "useMaxPValFilter", "p-value", callback=self.filterAnnotationsChartView) sp = gui.doubleSpin( pvalfilterbox, self, "maxPValue", 0.0, 1.0, 0.0001, tooltip="Maximum p-value", callback=self.filterAnnotationsChartView, callbackOnReturn=True, ) sp.setEnabled(self.useMaxPValFilter) cb.toggled[bool].connect(sp.setEnabled) pvalfilterbox.layout().setAlignment(cb, Qt.AlignRight) pvalfilterbox.layout().setAlignment(sp, Qt.AlignLeft) fdrfilterbox = gui.widgetBox(hWidget, orientation="horizontal") cb = gui.checkBox( fdrfilterbox, self, "useMaxFDRFilter", "FDR", callback=self.filterAnnotationsChartView) sp = gui.doubleSpin( fdrfilterbox, self, "maxFDR", 0.0, 1.0, 0.0001, tooltip="Maximum False discovery rate", callback=self.filterAnnotationsChartView, callbackOnReturn=True, ) sp.setEnabled(self.useMaxFDRFilter) cb.toggled[bool].connect(sp.setEnabled) fdrfilterbox.layout().setAlignment(cb, Qt.AlignRight) fdrfilterbox.layout().setAlignment(sp, Qt.AlignLeft) self.filterLineEdit = QLineEdit( self, placeholderText="Filter ..." ) self.filterCompleter = QCompleter(self.filterLineEdit) self.filterCompleter.setCaseSensitivity(Qt.CaseInsensitive) self.filterLineEdit.setCompleter(self.filterCompleter) hLayout.addWidget(self.filterLineEdit) self.mainArea.layout().addWidget(hWidget) self.filterLineEdit.textChanged.connect( self.filterAnnotationsChartView) self.annotationsChartView = QTreeView( alternatingRowColors=True, sortingEnabled=True, selectionMode=QTreeView.ExtendedSelection, rootIsDecorated=False, editTriggers=QTreeView.NoEditTriggers, uniformRowHeights=True, ) self.annotationsChartView.setItemDelegateForColumn( ResultsModel.Pval, RealDelegate(self, precision=4) ) self.annotationsChartView.setItemDelegateForColumn( ResultsModel.FDR, RealDelegate(self, precision=4) ) self.annotationsChartView.viewport().setMouseTracking(True) self.proxy = FilterProxyModel(parent=self, dynamicSortFilter=True) model = ResultsModel(parent=self) self.proxy.setSourceModel(model) self.annotationsChartView.setModel(self.proxy) self.annotationsChartView.selectionModel().selectionChanged.connect( lambda: self.commit() ) header = self.annotationsChartView.header() contextEventFilter = gui.VisibleHeaderSectionContextEventFilter( self.annotationsChartView ) header.installEventFilter(contextEventFilter) header.setSortIndicator(ResultsModel.Pval, Qt.AscendingOrder) if self.headerState: header.restoreState(self.headerState) self.mainArea.layout().addWidget(self.annotationsChartView) self.groupsWidget.itemClicked.connect(self.subsetSelectionChanged) gui.auto_commit(self.controlArea, self, "autocommit", "Commit") self.setBlocking(True) task = EnsureDownloaded( [(taxonomy.Taxonomy.DOMAIN, taxonomy.Taxonomy.FILENAME), (geneset.sfdomain, "index.pck")] ) task.finished.connect(self.__initialize_finish) self.setStatusMessage("Initializing") self._executor = ThreadExecutor( parent=self, threadPool=QThreadPool(self)) self._executor.submit(task) def sizeHint(self): return QSize(1024, 600) def closeEvent(self, event): self.headerState = bytes( self.annotationsChartView.header().saveState() ) super().closeEvent(event) def __initialize_finish(self): # Finalize the the widget's initialization (preferably after # ensuring all required databases have been downloaded. sets = geneset.list_all() taxids = set(taxonomy.common_taxids() + list(filter(None, [tid for _, tid, _ in sets]))) organisms = [(tid, name_or_none(tid)) for tid in taxids] organisms = [(tid, name) for tid, name in organisms if name is not None] organisms = [(None, "None")] + sorted(organisms) taxids = [tid for tid, _ in organisms] names = [name for _, name in organisms] self.taxid_list = taxids self.speciesComboBox.clear() self.speciesComboBox.addItems(names) self.genesets = sets if self.taxid in self.taxid_list: taxid = self.taxid else: taxid = self.taxid_list[0] self.taxid = None self.setCurrentOrganism(taxid) self.setBlocking(False) self.__state = OWSetEnrichment.Ready self.setStatusMessage("") def setCurrentOrganism(self, taxid): """Set the current organism `taxid`.""" if taxid not in self.taxid_list: taxid = self.taxid_list[min(self.speciesIndex, len(self.taxid_list) - 1)] if self.taxid != taxid: self.taxid = taxid self.speciesIndex = self.taxid_list.index(taxid) self.refreshHierarchy() self._invalidateGeneMatcher() self._invalidate() def currentOrganism(self): """Return the current organism taxid""" return self.taxid def __on_speciesIndexChanged(self): taxid = self.taxid_list[self.speciesIndex] self.taxid = "< Do not look >" self.setCurrentOrganism(taxid) if self.__invalidated and self.data is not None: self.updateAnnotations() def clear(self): """Clear/reset the widget state.""" self._cancelPending() self.state = None self.__state = self.__state & ~OWSetEnrichment.RunningEnrichment self._clearView() self.geneAttrComboBox.clear() self.geneAttrs = [] self._updatesummary() def _cancelPending(self): """Cancel pending tasks.""" if self.state is not None: self.state.results.cancel() self.state.namematcher.cancel() self.state.cancelled = True def _clearView(self): """Clear the enrichment report view (main area).""" model = self.proxy.sourceModel() # type: ResultsModel model.setRowCount(0) self.information(0) self.warning(0) def setData(self, data=None): """Set the input dataset with query gene names""" if self.__state & OWSetEnrichment.Initializing: self.__initialize_finish() self.error(0) self.closeContext() self.clear() self.groupsWidget.clear() self.data = data if data is not None: varlist = [var for var in data.domain.variables + data.domain.metas if isinstance(var, Orange.data.StringVariable)] self.geneAttrs = varlist for var in varlist: self.geneAttrComboBox.addItem(*gui.attributeItem(var)) oldtaxid = self.taxid self.geneattr = min(self.geneattr, len(self.geneAttrs) - 1) taxid = data_hints.get_hint(data, "taxid", "") if taxid in self.taxid_list: self.speciesIndex = self.taxid_list.index(taxid) self.taxid = taxid self.genesinrows = data_hints.get_hint( data, "genesinrows", self.genesinrows) self.openContext(data) if oldtaxid != self.taxid: self.taxid = "< Do not look >" self.setCurrentOrganism(taxid) self.refreshHierarchy() self._invalidate() def setReference(self, data=None): """Set the (optional) input dataset with reference gene names.""" self.referenceData = data self.referenceRadioBox.setEnabled(bool(data)) if self.useReferenceData: self._invalidate() def handleNewSignals(self): if self.__invalidated: self.updateAnnotations() def _invalidateGeneMatcher(self): _, f = self.__genematcher f.cancel() self.__genematcher = (None, fulfill(gene.matcher([]))) def _invalidate(self): self.__invalidated = True def genesFromTable(self, table): if self.genesinrows: genes = [attr.name for attr in table.domain.attributes] else: geneattr = self.geneAttrs[self.geneattr] genes = [str(ex[geneattr]) for ex in table] return genes def getHierarchy(self, taxid): def recursive_dict(): return defaultdict(recursive_dict) collection = recursive_dict() def collect(col, hier): if hier: collect(col[hier[0]], hier[1:]) for hierarchy, t_id, _ in self.genesets: collect(collection[t_id], hierarchy) return (taxid, collection[taxid]), (None, collection[None]) def setHierarchy(self, hierarchy, hierarchy_noorg): self.groupsWidgetItems = {} def fill(col, parent, full=(), org=""): for key, value in sorted(col.items()): full_cat = full + (key,) item = QTreeWidgetItem(parent, [key]) item.setFlags(item.flags() | Qt.ItemIsUserCheckable | Qt.ItemIsSelectable | Qt.ItemIsEnabled) if value: item.setFlags(item.flags() | Qt.ItemIsTristate) checked = self.categoriesCheckState.get( (full_cat, org), Qt.Checked) item.setData(0, Qt.CheckStateRole, checked) item.setExpanded(True) item.category = full_cat item.organism = org self.groupsWidgetItems[full_cat] = item fill(value, item, full_cat, org=org) self.groupsWidget.clear() fill(hierarchy[1], self.groupsWidget, org=hierarchy[0]) fill(hierarchy_noorg[1], self.groupsWidget, org=hierarchy_noorg[0]) def refreshHierarchy(self): self.setHierarchy(*self.getHierarchy(taxid=self.taxid_list[self.speciesIndex])) def selectedCategories(self): """ Return a list of currently selected hierarchy keys. A key is a tuple of identifiers from the root to the leaf of the hierarchy tree. """ return [key for key, check in self.getHierarchyCheckState().items() if check == Qt.Checked] def getHierarchyCheckState(self): def collect(item, full=()): checked = item.checkState(0) name = str(item.data(0, Qt.DisplayRole)) full_cat = full + (name,) result = [((full_cat, item.organism), checked)] for i in range(item.childCount()): result.extend(collect(item.child(i), full_cat)) return result items = [self.groupsWidget.topLevelItem(i) for i in range(self.groupsWidget.topLevelItemCount())] states = itertools.chain(*(collect(item) for item in items)) return dict(states) def subsetSelectionChanged(self, item, column): # The selected geneset (hierarchy) subset has been changed by the # user. Update the displayed results. # Update the stored state (persistent settings) self.categoriesCheckState = self.getHierarchyCheckState() categories = self.selectedCategories() if self.data is not None: if self._nogenematching() or \ not set(categories) <= set(self.currentAnnotatedCategories): self.updateAnnotations() else: self._updateFDR() self.filterAnnotationsChartView() def updateGeneMatcherSettings(self): raise NotImplementedError from .OWGOEnrichmentAnalysis import GeneMatcherDialog dialog = GeneMatcherDialog(self, defaults=self.geneMatcherSettings, enabled=[True] * 4, modal=True) if dialog.exec_(): self.geneMatcherSettings = [getattr(dialog, item[0]) for item in dialog.items] self._invalidateGeneMatcher() if self.data is not None: self.updateAnnotations() def _genematcher(self): """ Return a Future[gene.SequenceMatcher] """ taxid = self.taxid_list[self.speciesIndex] current, matcher_f = self.__genematcher if taxid == current and \ not matcher_f.cancelled(): return matcher_f self._invalidateGeneMatcher() if taxid is None: self.__genematcher = (None, fulfill(gene.matcher([]))) return self.__genematcher[1] matchers = [gene.GMGO, gene.GMKEGG, gene.GMNCBI, gene.GMAffy] matchers = [m for m, use in zip(matchers, self.geneMatcherSettings) if use] def create(): return gene.matcher([m(taxid) for m in matchers]) matcher_f = self._executor.submit(create) self.__genematcher = (taxid, matcher_f) return self.__genematcher[1] def _nogenematching(self): return self.taxid is None or not any(self.geneMatcherSettings) def updateAnnotations(self): if self.data is None: return assert not self.__state & OWSetEnrichment.Initializing self._cancelPending() self._clearView() self.information(0) self.warning(0) self.error(0) if not self.genesinrows and len(self.geneAttrs) == 0: self.error(0, "Input data contains no columns with gene names") return self.__state = OWSetEnrichment.RunningEnrichment taxid = self.taxid_list[self.speciesIndex] self.taxid = taxid categories = self.selectedCategories() clusterGenes = self.genesFromTable(self.data) if self.referenceData is not None and self.useReferenceData: referenceGenes = self.genesFromTable(self.referenceData) else: referenceGenes = None self.currentAnnotatedCategories = categories genematcher = self._genematcher() self.progressBarInit() ## Load collections in a worker thread # TODO: Use cached collections if already loaded and # use ensure_genesetsdownloaded with progress report (OWSelectGenes) collections = self._executor.submit(geneset.collections, *categories) def refset_null(): """Return the default background reference set""" col = collections.result() return reduce(operator.ior, (set(g.genes) for g in col), set()) def refset_ncbi(): """Return all NCBI gene names""" geneinfo = gene.NCBIGeneInfo(taxid) return set(geneinfo.keys()) def namematcher(): matcher = genematcher.result() match = matcher.set_targets(ref_set.result()) match.umatch = memoize(match.umatch) return match def map_unames(): matcher = namematcher.result() query = list(filter(None, map(matcher.umatch, querynames))) reference = list(filter(None, map(matcher.umatch, ref_set.result()))) return query, reference if self._nogenematching(): if referenceGenes is None: ref_set = self._executor.submit(refset_null) else: ref_set = fulfill(referenceGenes) else: if referenceGenes == None: ref_set = self._executor.submit(refset_ncbi) else: ref_set = fulfill(referenceGenes) namematcher = self._executor.submit(namematcher) querynames = clusterGenes state = types.SimpleNamespace() state.query_set = clusterGenes state.reference_set = referenceGenes state.namematcher = namematcher state.query_count = len(set(clusterGenes)) state.reference_count = (len(set(referenceGenes)) if referenceGenes is not None else None) state.cancelled = False progress = methodinvoke(self, "_setProgress", (float,)) info = methodinvoke(self, "_setRunInfo", (str,)) @withtraceback def run(): info("Loading data") match = namematcher.result() query, reference = map_unames() gscollections = collections.result() results = [] info("Running enrichment") p = 0 for i, gset in enumerate(gscollections): genes = set(filter(None, map(match.umatch, gset.genes))) enr = set_enrichment(genes, reference, query) results.append((gset, enr)) if state.cancelled: raise UserInteruptException pnew = int(100 * i / len(gscollections)) if pnew != p: progress(pnew) p = pnew progress(100) info("") return query, reference, results task = Task(function=run) task.resultReady.connect(self.__on_enrichment_finished) task.exceptionReady.connect(self.__on_enrichment_failed) result = self._executor.submit(task) state.results = result self.state = state self._updatesummary() def __on_enrichment_failed(self, exception): if not isinstance(exception, UserInteruptException): print("ERROR:", exception, file=sys.stderr) print(exception._traceback, file=sys.stderr) self.progressBarFinished() self.setStatusMessage("") self.__state &= ~OWSetEnrichment.RunningEnrichment def __on_enrichment_finished(self, results): assert QThread.currentThread() is self.thread() self.__state &= ~OWSetEnrichment.RunningEnrichment query, reference, results = results results = results # type: List[Tuple[geneset.GeneSet, enrichment_res]] nquery = len(query) nref = len(reference) maxcount = max((len(e.query_mapped) for _, e in results), default=1) maxrefcount = max((len(e.reference_mapped) for _, e in results), default=1) nspaces = int(math.ceil(math.log10(maxcount or 1))) refspaces = int(math.ceil(math.log(maxrefcount or 1))) query_fmt = "%" + str(nspaces) + "s (%.2f%%)" ref_fmt = "%" + str(refspaces) + "s (%.2f%%)" def fmt_count(fmt, count, total): return fmt % (count, 100.0 * count / (total or 1)) fmt_query_count = partial(fmt_count, query_fmt, total=nquery) fmt_ref_count = partial(fmt_count, ref_fmt, total=nref) view = self.annotationsChartView delegate = view.itemDelegateForColumn(ResultsModel.Count) if delegate is not None: delegate.deleteLater() delegate = view.itemDelegateForColumn(ResultsModel.Reference) if delegate is not None: delegate.deleteLater() view.setItemDelegateForColumn( ResultsModel.Count, FormatItemDelegate(fmt_query_count, self) ) view.setItemDelegateForColumn( ResultsModel.Reference, FormatItemDelegate(fmt_ref_count, self) ) linkFont = QFont(self.annotationsChartView.viewOptions().font) linkFont.setUnderline(True) def item(value=None, tooltip=None, user=None): si = QStandardItem() if value is not None: si.setData(value, Qt.DisplayRole) if tooltip is not None: si.setData(tooltip, Qt.ToolTipRole) if user is not None: si.setData(user, Qt.UserRole) else: si.setData(value, Qt.UserRole) return si model = ResultsModel(parent=self) for i, (gset, enrich) in enumerate(results): if len(enrich.query_mapped) == 0: continue nquery_mapped = len(enrich.query_mapped) nref_mapped = len(enrich.reference_mapped) row = [ item(", ".join(gset.hierarchy)), item(gsname(gset), tooltip=gset.link), item(nquery_mapped, tooltip=nquery_mapped, user=nquery_mapped), item(nref_mapped, tooltip=nref_mapped, user=nref_mapped), item(enrich.p_value, user=enrich.p_value), item(np.nan), # column 5, FDR, is computed in _updateFDR item(enrich.enrichment_score, tooltip="%.3f" % enrich.enrichment_score, user=enrich.enrichment_score) ] row[0].geneset = gset row[0].enrichment = enrich if gset.link: row[1].setData(gset.link, gui.LinkRole) row[1].setFont(linkFont) row[1].setForeground(QColor(Qt.blue)) model.appendRow(row) currmodel = self.proxy.sourceModel() currmodel.deleteLater() self.proxy.setSourceModel(model) if not model.rowCount(): self.warning(0, "No enriched sets found.") else: self.warning(0) allnames = set(gsname(geneset) for geneset, (count, _, _, _) in results if count) allnames |= reduce(operator.ior, (set(word_split(name)) for name in allnames), set()) self.filterCompleter.setModel(None) self.completerModel = QStringListModel(sorted(allnames)) self.filterCompleter.setModel(self.completerModel) if results: max_score = max((e.enrichment_score for _, e in results if np.isfinite(e.enrichment_score)), default=1) self.annotationsChartView.setItemDelegateForColumn( ResultsModel.Enrichment, BarItemDelegate(self, scale=(0.0, max_score)) ) self.annotationsChartView.setItemDelegateForColumn( ResultsModel.TermID, gui.LinkStyledItemDelegate(self.annotationsChartView) ) self._updateFDR() self.filterAnnotationsChartView() header = self.annotationsChartView.header() for i in range(model.columnCount()): sh = self.annotationsChartView.sizeHintForColumn(i) sh = max(header.sectionSizeHint(i), sh) width = self.annotationsChartView.columnWidth(i) if min(sh, 300) > width: self.annotationsChartView.setColumnWidth(i, max(min(sh, 300), 30)) self.progressBarFinished() self.setStatusMessage("") def _updatesummary(self): state = self.state if state is None: self.information(0) self.error(0) self.warning(0) self.infoBox.setText("No data on input.\n") return text = "{.query_count} unique names on input\n".format(state) if state.results.done() and not state.results.exception(): mapped, _, _ = state.results.result() ratio_mapped = (len(mapped) / state.query_count if state.query_count else 0) text += ("%i (%.1f%%) gene names matched" % (len(mapped), 100.0 * ratio_mapped)) elif not state.results.done(): text += "..." else: text += "<Error {}>".format(str(state.results.exception())) self.infoBox.setText(text) def _updateFDR(self): # Update the FDR in place due to a changed selected categories set and # results for all of these categories are already available. filter = self.proxy model = filter.sourceModel() assert isinstance(model, ResultsModel) categories = set(", ".join(cat) for cat, _ in self.selectedCategories()) def ishidden(index): # Is item at index (row) omitted from the results entirely # due to belonging to a non-selected category item = model.item(index) item_cat = item.data(Qt.DisplayRole) return item_cat not in categories hidemask = [ishidden(i) for i in range(model.rowCount())] # compute FDR according the selected categories pvals = [model.item(i, ResultsModel.Pval).data(Qt.UserRole) for i, hidden in enumerate(hidemask) if not hidden] fdrs = utils.stats.FDR(pvals) fdriter = iter(fdrs) old = model.blockSignals(True) try: for index, hidden in enumerate(hidemask): item = model.item(index, ResultsModel.FDR) if not hidden: fdr = next(fdriter) else: fdr = np.nan item.setData(fdr, Qt.DisplayRole) item.setData(fdr, Qt.UserRole) finally: model.blockSignals(old) model.dataChanged.emit( model.index(0, ResultsModel.FDR), model.index(model.rowCount() - 1, ResultsModel.FDR) ) def filterAnnotationsChartView(self): if self.__state & OWSetEnrichment.RunningEnrichment: return categories = set(", ".join(cat) for cat, _ in self.selectedCategories()) filterStrings = self.filterLineEdit.text().lower().strip().split() filter = self.proxy # type: FilterProxyModel model = filter.sourceModel() assert isinstance(model, ResultsModel) # apply filtering rules filters = [] # Filter selected collections. # NOTE: MUST match the condition in _updateFDR filters.append( FilterProxyModel.Filter( ResultsModel.Category, Qt.DisplayRole, lambda value: value in categories ) ) if filterStrings: filters.append( FilterProxyModel.Filter( ResultsModel.TermID, Qt.DisplayRole, lambda value: all(fs in value.lower() for fs in filterStrings) ) ) if self.useMinCountFilter: mincount = self.minClusterCount filters.append( FilterProxyModel.Filter( ResultsModel.Count, Qt.DisplayRole, lambda value: value >= mincount, ) ) if self.useMaxPValFilter: maxpval = self.maxPValue filters.append( FilterProxyModel.Filter( ResultsModel.Pval, Qt.DisplayRole, lambda value: value < maxpval ) ) if self.useMaxFDRFilter: maxfdr = self.maxFDR filters.append( FilterProxyModel.Filter( ResultsModel.FDR, Qt.DisplayRole, lambda value: value < maxfdr ) ) filter.setFilters(filters) if model.rowCount() and not filter.rowCount(): self.information(0, "All sets were filtered out.") else: self.information(0) self._updatesummary() @Slot(float) def _setProgress(self, value): assert QThread.currentThread() is self.thread() self.progressBarSet(value, processEvents=None) @Slot(str) def _setRunInfo(self, text): self.setStatusMessage(text) def commit(self): if self.data is None or \ self.__state & OWSetEnrichment.RunningEnrichment: return proxy = self.proxy model = proxy.sourceModel() assert isinstance(model, ResultsModel) rows = self.annotationsChartView.selectionModel().selectedRows(0) rows = [proxy.mapToSource(index) for index in rows] selected = [model.item(index.row(), 0) for index in rows] mapped = reduce(operator.ior, (set(item.enrichment.query_mapped) for item in selected), set()) assert self.state.namematcher.done() matcher = self.state.namematcher.result() axis = 1 if self.genesinrows else 0 if axis == 1: mapped = [attr for attr in self.data.domain.attributes if matcher.umatch(attr.name) in mapped] newdomain = Orange.data.Domain( mapped, self.data.domain.class_vars, self.data.domain.metas) data = self.data.from_table(newdomain, self.data) else: geneattr = self.geneAttrs[self.geneattr] selected = [i for i, ex in enumerate(self.data) if matcher.umatch(str(ex[geneattr])) in mapped] data = self.data[selected] self.send("Data subset", data) def onDeleteWidget(self): if self.state is not None: self._cancelPending() self.state = None self._executor.shutdown(wait=False)