class OWGEODatasets(OWWidget): name = "GEO Data Sets" description = DESCRIPTION icon = "../widgets/icons/GEODataSets.svg" priority = PRIORITY inputs = [] outputs = [("Expression Data", Orange.data.Table)] settingsList = [ "outputRows", "mergeSpots", "gdsSelectionStates", "splitterSettings", "currentGds", "autoCommit", "datasetNames" ] outputRows = Setting(True) mergeSpots = Setting(True) gdsSelectionStates = Setting({}) currentGds = Setting(None) datasetNames = Setting({}) splitterSettings = Setting(( b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xea\x00\x00\x00\xd7\x01\x00\x00\x00\x07\x01\x00\x00\x00\x02', b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xb5\x00\x00\x02\x10\x01\x00\x00\x00\x07\x01\x00\x00\x00\x01' )) autoCommit = Setting(False) def __init__(self, parent=None, signalManager=None, name=" GEO Data Sets"): OWWidget.__init__(self, parent, signalManager, name) self.selectionChanged = False self.filterString = "" self.datasetName = "" ## GUI box = gui.widgetBox(self.controlArea, "Info", addSpace=True) self.infoBox = gui.widgetLabel(box, "Initializing\n\n") box = gui.widgetBox(self.controlArea, "Output", addSpace=True) gui.radioButtonsInBox(box, self, "outputRows", ["Genes in rows", "Samples in rows"], "Rows", callback=self.commitIf) gui.checkBox(box, self, "mergeSpots", "Merge spots of same gene", callback=self.commitIf) gui.separator(box) self.nameEdit = gui.lineEdit( box, self, "datasetName", "Data set name", tooltip="Override the default output data set name", callback=self.onNameEdited) self.nameEdit.setPlaceholderText("") if sys.version_info < (3, ): box = gui.widgetBox(self.controlArea, "Commit", addSpace=True) self.commitButton = gui.button(box, self, "Commit", callback=self.commit) cb = gui.checkBox(box, self, "autoCommit", "Commit on any change") gui.setStopper(self, self.commitButton, cb, "selectionChanged", self.commit) else: gui.auto_commit(self.controlArea, self, "autoCommit", "Commit", box="Commit") self.commitIf = self.commit gui.rubber(self.controlArea) gui.widgetLabel(self.mainArea, "Filter") self.filterLineEdit = QLineEdit(textChanged=self.filter) self.completer = TokenListCompleter(self, caseSensitivity=Qt.CaseInsensitive) self.filterLineEdit.setCompleter(self.completer) self.mainArea.layout().addWidget(self.filterLineEdit) splitter = QSplitter(Qt.Vertical, self.mainArea) self.mainArea.layout().addWidget(splitter) self.treeWidget = QTreeView(splitter) self.treeWidget.setSelectionMode(QTreeView.SingleSelection) self.treeWidget.setRootIsDecorated(False) self.treeWidget.setSortingEnabled(True) self.treeWidget.setAlternatingRowColors(True) self.treeWidget.setUniformRowHeights(True) self.treeWidget.setEditTriggers(QTreeView.NoEditTriggers) linkdelegate = LinkStyledItemDelegate(self.treeWidget) self.treeWidget.setItemDelegateForColumn(1, linkdelegate) self.treeWidget.setItemDelegateForColumn(8, linkdelegate) self.treeWidget.setItemDelegateForColumn( 0, gui.IndicatorItemDelegate(self.treeWidget, role=Qt.DisplayRole)) proxyModel = MySortFilterProxyModel(self.treeWidget) self.treeWidget.setModel(proxyModel) self.treeWidget.selectionModel().selectionChanged.connect( self.updateSelection) self.treeWidget.viewport().setMouseTracking(True) splitterH = QSplitter(Qt.Horizontal, splitter) box = gui.widgetBox(splitterH, "Description") self.infoGDS = gui.widgetLabel(box, "") self.infoGDS.setWordWrap(True) gui.rubber(box) box = gui.widgetBox(splitterH, "Sample Annotations") self.annotationsTree = QTreeWidget(box) self.annotationsTree.setHeaderLabels( ["Type (Sample annotations)", "Sample count"]) self.annotationsTree.setRootIsDecorated(True) box.layout().addWidget(self.annotationsTree) self.annotationsTree.itemChanged.connect( self.annotationSelectionChanged) self._annotationsUpdating = False self.splitters = splitter, splitterH for sp, setting in zip(self.splitters, self.splitterSettings): sp.splitterMoved.connect(self.splitterMoved) sp.restoreState(setting) self.searchKeys = [ "dataset_id", "title", "platform_organism", "description" ] self.gds = [] self.gds_info = None self.resize(1000, 600) self.setBlocking(True) self.setEnabled(False) self.progressBarInit() self._executor = ThreadExecutor() func = partial(get_gds_model, methodinvoke(self, "_setProgress", (float, ))) self._inittask = Task(function=func) self._inittask.finished.connect(self._initializemodel) self._executor.submit(self._inittask) self._datatask = None @Slot(float) def _setProgress(self, value): self.progressBarValue = value def _initializemodel(self): assert self.thread() is QThread.currentThread() model, self.gds_info, self.gds = self._inittask.result() model.setParent(self) proxy = self.treeWidget.model() proxy.setFilterKeyColumn(0) proxy.setFilterRole(TextFilterRole) proxy.setFilterCaseSensitivity(False) proxy.setFilterFixedString(self.filterString) proxy.setSourceModel(model) proxy.sort(0, Qt.DescendingOrder) self.progressBarFinished() self.setBlocking(False) self.setEnabled(True) filter_items = " ".join(gds[key] for gds in self.gds for key in self.searchKeys) tr_chars = ",.:;!?(){}[]_-+\\|/%#@$^&*<>~`" tr_table = str.maketrans(tr_chars, " " * len(tr_chars)) filter_items = filter_items.translate(tr_table) filter_items = sorted(set(filter_items.split(" "))) filter_items = [item for item in filter_items if len(item) > 3] self.completer.setTokenList(filter_items) if self.currentGds: current_id = self.currentGds["dataset_id"] gdss = [(i, qunpack(proxy.data(proxy.index(i, 1), Qt.DisplayRole))) for i in range(proxy.rowCount())] current = [i for i, data in gdss if data and data == current_id] if current: current_index = proxy.index(current[0], 0) self.treeWidget.selectionModel().select( current_index, QItemSelectionModel.Select | QItemSelectionModel.Rows) self.treeWidget.scrollTo(current_index, QTreeView.PositionAtCenter) for i in range(8): self.treeWidget.resizeColumnToContents(i) self.treeWidget.setColumnWidth( 1, min(self.treeWidget.columnWidth(1), 300)) self.treeWidget.setColumnWidth( 2, min(self.treeWidget.columnWidth(2), 200)) self.updateInfo() def updateInfo(self): gds_info = self.gds_info text = ("%i datasets\n%i datasets cached\n" % (len(gds_info), len(glob.glob(serverfiles.localpath("GEO") + "/GDS*")))) filtered = self.treeWidget.model().rowCount() if len(self.gds) != filtered: text += ("%i after filtering") % filtered self.infoBox.setText(text) def updateSelection(self, *args): current = self.treeWidget.selectedIndexes() mapToSource = self.treeWidget.model().mapToSource current = [mapToSource(index).row() for index in current] if current: self.currentGds = self.gds[current[0]] self.setAnnotations(self.currentGds) self.infoGDS.setText(self.currentGds.get("description", "")) self.nameEdit.setPlaceholderText(self.currentGds["title"]) self.datasetName = \ self.datasetNames.get(self.currentGds["dataset_id"], "") else: self.currentGds = None self.nameEdit.setPlaceholderText("") self.datasetName = "" self.commitIf() def setAnnotations(self, gds): self._annotationsUpdating = True self.annotationsTree.clear() annotations = defaultdict(set) subsetscount = {} for desc in gds["subsets"]: annotations[desc["type"]].add(desc["description"]) subsetscount[desc["description"]] = str(len(desc["sample_id"])) for type, subsets in annotations.items(): key = (gds["dataset_id"], type) subsetItem = QTreeWidgetItem(self.annotationsTree, [type]) subsetItem.setFlags(subsetItem.flags() | Qt.ItemIsUserCheckable | Qt.ItemIsTristate) subsetItem.setCheckState( 0, self.gdsSelectionStates.get(key, Qt.Checked)) subsetItem.key = key for subset in subsets: key = (gds["dataset_id"], type, subset) item = QTreeWidgetItem( subsetItem, [subset, subsetscount.get(subset, "")]) item.setFlags(item.flags() | Qt.ItemIsUserCheckable) item.setCheckState( 0, self.gdsSelectionStates.get(key, Qt.Checked)) item.key = key self._annotationsUpdating = False self.annotationsTree.expandAll() for i in range(self.annotationsTree.columnCount()): self.annotationsTree.resizeColumnToContents(i) def annotationSelectionChanged(self, item, column): if self._annotationsUpdating: return for i in range(self.annotationsTree.topLevelItemCount()): item = self.annotationsTree.topLevelItem(i) self.gdsSelectionStates[item.key] = item.checkState(0) for j in range(item.childCount()): child = item.child(j) self.gdsSelectionStates[child.key] = child.checkState(0) def filter(self): filter_string = unicode(self.filterLineEdit.text()) proxyModel = self.treeWidget.model() if proxyModel: strings = filter_string.lower().strip().split() proxyModel.setFilterFixedStrings(strings) self.updateInfo() def selectedSamples(self): """ Return the currently selected sample annotations. The return value is a list of selected (sample type, sample value) tuples. .. note:: if some Sample annotation type has no selected values. this method will return all values for it. """ samples = [] unused_types = [] used_types = [] for stype in childiter(self.annotationsTree.invisibleRootItem()): selected_values = [] all_values = [] for sval in childiter(stype): value = (str(stype.text(0)), str(sval.text(0))) if self.gdsSelectionStates.get(sval.key, True): selected_values.append(value) all_values.append(value) if selected_values: samples.extend(selected_values) used_types.append(str(stype.text(0))) else: # If no sample of sample type is selected we don't filter # on it. samples.extend(all_values) unused_types.append(str(stype.text(0))) return samples, used_types def commitIf(self): if self.autoCommit: self.commit() else: self.selectionChanged = True @Slot(int, int) def progressCompleted(self, value, total): if total > 0: self.progressBarSet(100. * value / total, processEvents=False) else: pass # TODO: report 'indeterminate progress' def commit(self): if self.currentGds: self.error(0) sample_type = None self.progressBarInit(processEvents=None) _, groups = self.selectedSamples() if len(groups) == 1 and self.outputRows: sample_type = groups[0] self.setEnabled(False) self.setBlocking(True) progress = methodinvoke(self, "progressCompleted", (int, int)) def get_data(gds_id, report_genes, transpose, sample_type, title): gds_ensure_downloaded(gds_id, progress) gds = geo.GDS(gds_id) data = gds.getdata(report_genes=report_genes, transpose=transpose, sample_type=sample_type) data.name = title return data get_data = partial(get_data, self.currentGds["dataset_id"], report_genes=self.mergeSpots, transpose=self.outputRows, sample_type=sample_type, title=self.datasetName or self.currentGds["title"]) self._datatask = Task(function=get_data) self._datatask.finished.connect(self._on_dataready) self._executor.submit(self._datatask) def _on_dataready(self): self.setEnabled(True) self.setBlocking(False) self.progressBarFinished(processEvents=False) try: data = self._datatask.result() except urlrequest.URLError as error: self.error(0, ("Error while connecting to the NCBI ftp server! " "'%s'" % error)) sys.excepthook(type(error), error, getattr(error, "__traceback__")) return finally: self._datatask = None data_name = data.name samples, _ = self.selectedSamples() self.warning(0) message = None if self.outputRows: def samplesinst(ex): out = [] for meta in data.domain.metas: out.append((meta.name, ex[meta].value)) if data.domain.class_var.name != 'class': out.append((data.domain.class_var.name, ex[data.domain.class_var].value)) return out samples = set(samples) mask = [samples.issuperset(samplesinst(ex)) for ex in data] data = data[numpy.array(mask, dtype=bool)] if len(data) == 0: message = "No samples with selected sample annotations." else: samples = set(samples) domain = Orange.data.Domain([ attr for attr in data.domain.attributes if samples.issuperset(attr.attributes.items()) ], data.domain.class_var, data.domain.metas) # domain.addmetas(data.domain.getmetas()) if len(domain.attributes) == 0: message = "No samples with selected sample annotations." stypes = set(s[0] for s in samples) for attr in domain.attributes: attr.attributes = dict( (key, value) for key, value in attr.attributes.items() if key in stypes) data = Orange.data.Table(domain, data) if message is not None: self.warning(0, message) data_hints.set_hint(data, "taxid", self.currentGds.get("taxid", ""), 10.0) data_hints.set_hint(data, "genesinrows", self.outputRows, 10.0) data.name = data_name self.send("Expression Data", data) model = self.treeWidget.model().sourceModel() row = self.gds.index(self.currentGds) model.setData(model.index(row, 0), " ", Qt.DisplayRole) self.updateInfo() self.selectionChanged = False def splitterMoved(self, *args): self.splitterSettings = [ bytes(sp.saveState()) for sp in self.splitters ] def send_report(self): self.report_items("GEO Dataset", [("ID", self.currentGds['dataset_id']), ("Title", self.currentGds['title']), ("Organism", self.currentGds['sample_organism'])]) self.report_items("Data", [("Samples", self.currentGds['sample_count']), ("Features", self.currentGds['feature_count']), ("Genes", self.currentGds['gene_count'])]) self.report_name("Sample annotations") subsets = defaultdict(list) for subset in self.currentGds['subsets']: subsets[subset['type']].append( (subset['description'], len(subset['sample_id']))) self.report_html += "<ul>" for type in subsets: self.report_html += "<b>" + type + ":</b></br>" for desc, count in subsets[type]: self.report_html += 9 * " " + "<b>{}:</b> {}</br>".format( desc, count) self.report_html += "</ul>" def onDeleteWidget(self): if self._inittask: self._inittask.future().cancel() self._inittask.finished.disconnect(self._initializemodel) if self._datatask: self._datatask.future().cancel() self._datatask.finished.disconnect(self._on_dataready) self._executor.shutdown(wait=False) super(OWGEODatasets, self).onDeleteWidget() def onNameEdited(self): if self.currentGds: gds_id = self.currentGds["dataset_id"] self.datasetNames[gds_id] = unicode(self.nameEdit.text()) self.commitIf()
class OWGEODatasets(OWWidget): name = "GEO Data Sets" description = DESCRIPTION icon = "../widgets/icons/GEODataSets.svg" priority = PRIORITY inputs = [] outputs = [("Expression Data", Orange.data.Table)] settingsList = ["outputRows", "mergeSpots", "gdsSelectionStates", "splitterSettings", "currentGds", "autoCommit", "datasetNames"] outputRows = Setting(True) mergeSpots = Setting(True) gdsSelectionStates = Setting({}) currentGds = Setting(None) datasetNames = Setting({}) splitterSettings = Setting( (b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xea\x00\x00\x00\xd7\x01\x00\x00\x00\x07\x01\x00\x00\x00\x02', b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xb5\x00\x00\x02\x10\x01\x00\x00\x00\x07\x01\x00\x00\x00\x01') ) autoCommit = Setting(False) def __init__(self, parent=None, signalManager=None, name=" GEO Data Sets"): OWWidget.__init__(self, parent, signalManager, name) self.selectionChanged = False self.filterString = "" self.datasetName = "" ## GUI box = gui.widgetBox(self.controlArea, "Info", addSpace=True) self.infoBox = gui.widgetLabel(box, "Initializing\n\n") box = gui.widgetBox(self.controlArea, "Output", addSpace=True) gui.radioButtonsInBox(box, self, "outputRows", ["Genes in rows", "Samples in rows"], "Rows", callback=self.commitIf) gui.checkBox(box, self, "mergeSpots", "Merge spots of same gene", callback=self.commitIf) gui.separator(box) self.nameEdit = gui.lineEdit( box, self, "datasetName", "Data set name", tooltip="Override the default output data set name", callback=self.onNameEdited ) self.nameEdit.setPlaceholderText("") if sys.version_info < (3, ): box = gui.widgetBox(self.controlArea, "Commit", addSpace=True) self.commitButton = gui.button( box, self, "Commit", callback=self.commit) cb = gui.checkBox(box, self, "autoCommit", "Commit on any change") gui.setStopper(self, self.commitButton, cb, "selectionChanged", self.commit) else: gui.auto_commit(self.controlArea, self, "autoCommit", "Commit", box="Commit") self.commitIf = self.commit gui.rubber(self.controlArea) gui.widgetLabel(self.mainArea, "Filter") self.filterLineEdit = QLineEdit( textChanged=self.filter ) self.completer = TokenListCompleter( self, caseSensitivity=Qt.CaseInsensitive ) self.filterLineEdit.setCompleter(self.completer) self.mainArea.layout().addWidget(self.filterLineEdit) splitter = QSplitter(Qt.Vertical, self.mainArea) self.mainArea.layout().addWidget(splitter) self.treeWidget = QTreeView(splitter) self.treeWidget.setSelectionMode(QTreeView.SingleSelection) self.treeWidget.setRootIsDecorated(False) self.treeWidget.setSortingEnabled(True) self.treeWidget.setAlternatingRowColors(True) self.treeWidget.setUniformRowHeights(True) self.treeWidget.setEditTriggers(QTreeView.NoEditTriggers) linkdelegate = LinkStyledItemDelegate(self.treeWidget) self.treeWidget.setItemDelegateForColumn(1, linkdelegate) self.treeWidget.setItemDelegateForColumn(8, linkdelegate) self.treeWidget.setItemDelegateForColumn( 0, gui.IndicatorItemDelegate(self.treeWidget, role=Qt.DisplayRole)) proxyModel = MySortFilterProxyModel(self.treeWidget) self.treeWidget.setModel(proxyModel) self.treeWidget.selectionModel().selectionChanged.connect( self.updateSelection ) self.treeWidget.viewport().setMouseTracking(True) splitterH = QSplitter(Qt.Horizontal, splitter) box = gui.widgetBox(splitterH, "Description") self.infoGDS = gui.widgetLabel(box, "") self.infoGDS.setWordWrap(True) gui.rubber(box) box = gui.widgetBox(splitterH, "Sample Annotations") self.annotationsTree = QTreeWidget(box) self.annotationsTree.setHeaderLabels( ["Type (Sample annotations)", "Sample count"] ) self.annotationsTree.setRootIsDecorated(True) box.layout().addWidget(self.annotationsTree) self.annotationsTree.itemChanged.connect( self.annotationSelectionChanged ) self._annotationsUpdating = False self.splitters = splitter, splitterH for sp, setting in zip(self.splitters, self.splitterSettings): sp.splitterMoved.connect(self.splitterMoved) sp.restoreState(setting) self.searchKeys = ["dataset_id", "title", "platform_organism", "description"] self.gds = [] self.gds_info = None self.resize(1000, 600) self.setBlocking(True) self.setEnabled(False) self.progressBarInit() self._executor = ThreadExecutor() func = partial(get_gds_model, methodinvoke(self, "_setProgress", (float,))) self._inittask = Task(function=func) self._inittask.finished.connect(self._initializemodel) self._executor.submit(self._inittask) self._datatask = None @Slot(float) def _setProgress(self, value): self.progressBarValue = value def _initializemodel(self): assert self.thread() is QThread.currentThread() model, self.gds_info, self.gds = self._inittask.result() model.setParent(self) proxy = self.treeWidget.model() proxy.setFilterKeyColumn(0) proxy.setFilterRole(TextFilterRole) proxy.setFilterCaseSensitivity(False) proxy.setFilterFixedString(self.filterString) proxy.setSourceModel(model) proxy.sort(0, Qt.DescendingOrder) self.progressBarFinished() self.setBlocking(False) self.setEnabled(True) filter_items = " ".join( gds[key] for gds in self.gds for key in self.searchKeys ) tr_chars = ",.:;!?(){}[]_-+\\|/%#@$^&*<>~`" tr_table = str.maketrans(tr_chars, " " * len(tr_chars)) filter_items = filter_items.translate(tr_table) filter_items = sorted(set(filter_items.split(" "))) filter_items = [item for item in filter_items if len(item) > 3] self.completer.setTokenList(filter_items) if self.currentGds: current_id = self.currentGds["dataset_id"] gdss = [(i, qunpack(proxy.data(proxy.index(i, 1), Qt.DisplayRole))) for i in range(proxy.rowCount())] current = [i for i, data in gdss if data and data == current_id] if current: current_index = proxy.index(current[0], 0) self.treeWidget.selectionModel().select( current_index, QItemSelectionModel.Select | QItemSelectionModel.Rows ) self.treeWidget.scrollTo( current_index, QTreeView.PositionAtCenter) for i in range(8): self.treeWidget.resizeColumnToContents(i) self.treeWidget.setColumnWidth( 1, min(self.treeWidget.columnWidth(1), 300)) self.treeWidget.setColumnWidth( 2, min(self.treeWidget.columnWidth(2), 200)) self.updateInfo() def updateInfo(self): gds_info = self.gds_info text = ("%i datasets\n%i datasets cached\n" % (len(gds_info), len(glob.glob(serverfiles.localpath("GEO") + "/GDS*")))) filtered = self.treeWidget.model().rowCount() if len(self.gds) != filtered: text += ("%i after filtering") % filtered self.infoBox.setText(text) def updateSelection(self, *args): current = self.treeWidget.selectedIndexes() mapToSource = self.treeWidget.model().mapToSource current = [mapToSource(index).row() for index in current] if current: self.currentGds = self.gds[current[0]] self.setAnnotations(self.currentGds) self.infoGDS.setText(self.currentGds.get("description", "")) self.nameEdit.setPlaceholderText(self.currentGds["title"]) self.datasetName = \ self.datasetNames.get(self.currentGds["dataset_id"], "") else: self.currentGds = None self.nameEdit.setPlaceholderText("") self.datasetName = "" self.commitIf() def setAnnotations(self, gds): self._annotationsUpdating = True self.annotationsTree.clear() annotations = defaultdict(set) subsetscount = {} for desc in gds["subsets"]: annotations[desc["type"]].add(desc["description"]) subsetscount[desc["description"]] = str(len(desc["sample_id"])) for type, subsets in annotations.items(): key = (gds["dataset_id"], type) subsetItem = QTreeWidgetItem(self.annotationsTree, [type]) subsetItem.setFlags(subsetItem.flags() | Qt.ItemIsUserCheckable | Qt.ItemIsTristate) subsetItem.setCheckState( 0, self.gdsSelectionStates.get(key, Qt.Checked) ) subsetItem.key = key for subset in subsets: key = (gds["dataset_id"], type, subset) item = QTreeWidgetItem( subsetItem, [subset, subsetscount.get(subset, "")] ) item.setFlags(item.flags() | Qt.ItemIsUserCheckable) item.setCheckState( 0, self.gdsSelectionStates.get(key, Qt.Checked) ) item.key = key self._annotationsUpdating = False self.annotationsTree.expandAll() for i in range(self.annotationsTree.columnCount()): self.annotationsTree.resizeColumnToContents(i) def annotationSelectionChanged(self, item, column): if self._annotationsUpdating: return for i in range(self.annotationsTree.topLevelItemCount()): item = self.annotationsTree.topLevelItem(i) self.gdsSelectionStates[item.key] = item.checkState(0) for j in range(item.childCount()): child = item.child(j) self.gdsSelectionStates[child.key] = child.checkState(0) def filter(self): filter_string = unicode(self.filterLineEdit.text()) proxyModel = self.treeWidget.model() if proxyModel: strings = filter_string.lower().strip().split() proxyModel.setFilterFixedStrings(strings) self.updateInfo() def selectedSamples(self): """ Return the currently selected sample annotations. The return value is a list of selected (sample type, sample value) tuples. .. note:: if some Sample annotation type has no selected values. this method will return all values for it. """ samples = [] unused_types = [] used_types = [] for stype in childiter(self.annotationsTree.invisibleRootItem()): selected_values = [] all_values = [] for sval in childiter(stype): value = (str(stype.text(0)), str(sval.text(0))) if self.gdsSelectionStates.get(sval.key, True): selected_values.append(value) all_values.append(value) if selected_values: samples.extend(selected_values) used_types.append(str(stype.text(0))) else: # If no sample of sample type is selected we don't filter # on it. samples.extend(all_values) unused_types.append(str(stype.text(0))) return samples, used_types def commitIf(self): if self.autoCommit: self.commit() else: self.selectionChanged = True @Slot(int, int) def progressCompleted(self, value, total): if total > 0: self.progressBarSet(100. * value / total, processEvents=False) else: pass # TODO: report 'indeterminate progress' def commit(self): if self.currentGds: self.error(0) sample_type = None self.progressBarInit(processEvents=None) _, groups = self.selectedSamples() if len(groups) == 1 and self.outputRows: sample_type = groups[0] self.setEnabled(False) self.setBlocking(True) progress = methodinvoke(self, "progressCompleted", (int, int)) def get_data(gds_id, report_genes, transpose, sample_type, title): gds_ensure_downloaded(gds_id, progress) gds = geo.GDS(gds_id) data = gds.getdata( report_genes=report_genes, transpose=transpose, sample_type=sample_type ) data.name = title return data get_data = partial( get_data, self.currentGds["dataset_id"], report_genes=self.mergeSpots, transpose=self.outputRows, sample_type=sample_type, title=self.datasetName or self.currentGds["title"] ) self._datatask = Task(function=get_data) self._datatask.finished.connect(self._on_dataready) self._executor.submit(self._datatask) def _on_dataready(self): self.setEnabled(True) self.setBlocking(False) self.progressBarFinished(processEvents=False) try: data = self._datatask.result() except urlrequest.URLError as error: self.error(0, ("Error while connecting to the NCBI ftp server! " "'%s'" % error)) sys.excepthook(type(error), error, getattr(error, "__traceback__")) return finally: self._datatask = None data_name = data.name samples, _ = self.selectedSamples() self.warning(0) message = None if self.outputRows: def samplesinst(ex): out = [] for meta in data.domain.metas: out.append((meta.name, ex[meta].value)) if data.domain.class_var.name != 'class': out.append((data.domain.class_var.name, ex[data.domain.class_var].value)) return out samples = set(samples) mask = [samples.issuperset(samplesinst(ex)) for ex in data] data = data[numpy.array(mask, dtype=bool)] if len(data) == 0: message = "No samples with selected sample annotations." else: samples = set(samples) domain = Orange.data.Domain( [attr for attr in data.domain.attributes if samples.issuperset(attr.attributes.items())], data.domain.class_var, data.domain.metas ) # domain.addmetas(data.domain.getmetas()) if len(domain.attributes) == 0: message = "No samples with selected sample annotations." stypes = set(s[0] for s in samples) for attr in domain.attributes: attr.attributes = dict( (key, value) for key, value in attr.attributes.items() if key in stypes ) data = Orange.data.Table(domain, data) if message is not None: self.warning(0, message) data_hints.set_hint(data, "taxid", self.currentGds.get("taxid", ""), 10.0) data_hints.set_hint(data, "genesinrows", self.outputRows, 10.0) data.name = data_name self.send("Expression Data", data) model = self.treeWidget.model().sourceModel() row = self.gds.index(self.currentGds) model.setData(model.index(row, 0), " ", Qt.DisplayRole) self.updateInfo() self.selectionChanged = False def splitterMoved(self, *args): self.splitterSettings = [bytes(sp.saveState()) for sp in self.splitters] def send_report(self): self.report_items("GEO Dataset", [("ID", self.currentGds['dataset_id']), ("Title", self.currentGds['title']), ("Organism", self.currentGds['sample_organism'])]) self.report_items("Data", [("Samples", self.currentGds['sample_count']), ("Features", self.currentGds['feature_count']), ("Genes", self.currentGds['gene_count'])]) self.report_name("Sample annotations") subsets = defaultdict(list) for subset in self.currentGds['subsets']: subsets[subset['type']].append((subset['description'], len(subset['sample_id']))) self.report_html += "<ul>" for type in subsets: self.report_html += "<b>" + type + ":</b></br>" for desc, count in subsets[type]: self.report_html += 9 * " " + "<b>{}:</b> {}</br>".format(desc, count) self.report_html += "</ul>" def onDeleteWidget(self): if self._inittask: self._inittask.future().cancel() self._inittask.finished.disconnect(self._initializemodel) if self._datatask: self._datatask.future().cancel() self._datatask.finished.disconnect(self._on_dataready) self._executor.shutdown(wait=False) super(OWGEODatasets, self).onDeleteWidget() def onNameEdited(self): if self.currentGds: gds_id = self.currentGds["dataset_id"] self.datasetNames[gds_id] = unicode(self.nameEdit.text()) self.commitIf()
class OWGeneInfo(widget.OWWidget): name = "Gene Info" description = "Displays gene information from NCBI and other sources." icon = "../widgets/icons/GeneInfo.svg" priority = 2010 inputs = [("Data", Orange.data.Table, "setData")] outputs = [("Data Subset", Orange.data.Table)] settingsHandler = settings.DomainContextHandler() organism_index = settings.ContextSetting(0) taxid = settings.ContextSetting("9606") gene_attr = settings.ContextSetting(0) auto_commit = settings.Setting(False) search_string = settings.Setting("") useAttr = settings.ContextSetting(False) useAltSource = settings.ContextSetting(False) def __init__( self, parent=None, ): super().__init__(self, parent) self.selectionChangedFlag = False self.__initialized = False self.initfuture = None self.itemsfuture = None self.infoLabel = gui.widgetLabel( gui.widgetBox(self.controlArea, "Info", addSpace=True), "Initializing\n") self.organisms = None self.organismBox = gui.widgetBox(self.controlArea, "Organism", addSpace=True) self.organismComboBox = gui.comboBox( self.organismBox, self, "organism_index", callback=self._onSelectedOrganismChanged) # For now only support one alt source, with a checkbox # In the future this can be extended to multiple selections self.altSourceCheck = gui.checkBox(self.organismBox, self, "useAltSource", "Show information from dictyBase", callback=self.onAltSourceChange) self.altSourceCheck.hide() box = gui.widgetBox(self.controlArea, "Gene names", addSpace=True) self.geneAttrComboBox = gui.comboBox(box, self, "gene_attr", "Gene attribute", callback=self.updateInfoItems) self.geneAttrComboBox.setEnabled(not self.useAttr) cb = gui.checkBox(box, self, "useAttr", "Use attribute names", callback=self.updateInfoItems) cb.toggled[bool].connect(self.geneAttrComboBox.setDisabled) gui.auto_commit(self.controlArea, self, "auto_commit", "Commit") # A label for dictyExpress link (Why oh god why???) self.dictyExpressBox = gui.widgetBox(self.controlArea, "Dicty Express") self.linkLabel = gui.widgetLabel(self.dictyExpressBox, "") self.linkLabel.setOpenExternalLinks(False) self.linkLabel.linkActivated.connect(self.onDictyExpressLink) self.dictyExpressBox.hide() gui.rubber(self.controlArea) gui.lineEdit(self.mainArea, self, "search_string", "Filter", callbackOnType=True, callback=self.searchUpdate) self.treeWidget = QTreeView(self.mainArea, selectionMode=QTreeView.ExtendedSelection, rootIsDecorated=False, uniformRowHeights=True, sortingEnabled=True) self.treeWidget.setItemDelegate( gui.LinkStyledItemDelegate(self.treeWidget)) self.treeWidget.viewport().setMouseTracking(True) self.mainArea.layout().addWidget(self.treeWidget) box = gui.widgetBox(self.mainArea, "", orientation="horizontal") gui.button(box, self, "Select Filtered", callback=self.selectFiltered) gui.button(box, self, "Clear Selection", callback=self.treeWidget.clearSelection) self.geneinfo = [] self.cells = [] self.row2geneinfo = {} self.data = None # : (# input genes, # matches genes) self.matchedInfo = 0, 0 self.setBlocking(True) self.executor = ThreadExecutor(self) self.progressBarInit() task = Task( function=partial(taxonomy.ensure_downloaded, callback=methodinvoke(self, "advance", ()))) task.resultReady.connect(self.initialize) task.exceptionReady.connect(self._onInitializeError) self.initfuture = self.executor.submit(task) def sizeHint(self): return QSize(1024, 720) @Slot() def advance(self): assert self.thread() is QThread.currentThread() self.progressBarSet(self.progressBarValue + 1, processEvents=None) def initialize(self): if self.__initialized: # Already initialized return self.__initialized = True self.organisms = sorted( set([ name.split(".")[-2] for name in serverfiles.listfiles("NCBI_geneinfo") ] + gene.NCBIGeneInfo.common_taxids())) self.organismComboBox.addItems( [taxonomy.name(tax_id) for tax_id in self.organisms]) if self.taxid in self.organisms: self.organism_index = self.organisms.index(self.taxid) else: self.organism_index = 0 self.taxid = self.organisms[self.organism_index] self.altSourceCheck.setVisible(self.taxid == DICTY_TAXID) self.dictyExpressBox.setVisible(self.taxid == DICTY_TAXID) self.infoLabel.setText("No data on input\n") self.initfuture = None self.setBlocking(False) self.progressBarFinished(processEvents=None) def _onInitializeError(self, exc): sys.excepthook(type(exc), exc, None) self.error(0, "Could not download the necessary files.") def _onSelectedOrganismChanged(self): assert 0 <= self.organism_index <= len(self.organisms) self.taxid = self.organisms[self.organism_index] self.altSourceCheck.setVisible(self.taxid == DICTY_TAXID) self.dictyExpressBox.setVisible(self.taxid == DICTY_TAXID) if self.data is not None: self.updateInfoItems() def setData(self, data=None): if not self.__initialized: self.initfuture.result() self.initialize() if self.itemsfuture is not None: raise Exception("Already processing") self.closeContext() self.data = data if data is not None: self.geneAttrComboBox.clear() self.attributes = \ [attr for attr in data.domain.variables + data.domain.metas if isinstance(attr, (Orange.data.StringVariable, Orange.data.DiscreteVariable))] for var in self.attributes: self.geneAttrComboBox.addItem(*gui.attributeItem(var)) self.taxid = data_hints.get_hint(self.data, "taxid", self.taxid) self.useAttr = data_hints.get_hint(self.data, "genesinrows", self.useAttr) self.openContext(data) self.gene_attr = min(self.gene_attr, len(self.attributes) - 1) if self.taxid in self.organisms: self.organism_index = self.organisms.index(self.taxid) else: self.organism_index = 0 self.taxid = self.organisms[self.organism_index] self.updateInfoItems() else: self.clear() def infoSource(self): """ Return the current selected info source getter function from INFO_SOURCES """ org = self.organisms[min(self.organism_index, len(self.organisms) - 1)] if org not in INFO_SOURCES: org = "default" sources = INFO_SOURCES[org] name, func = sources[min(self.useAltSource, len(sources) - 1)] return name, func def inputGenes(self): if self.useAttr: genes = [attr.name for attr in self.data.domain.attributes] elif self.attributes: attr = self.attributes[self.gene_attr] genes = [ str(ex[attr]) for ex in self.data if not math.isnan(ex[attr]) ] else: genes = [] return genes def updateInfoItems(self): self.warning(0) if self.data is None: return genes = self.inputGenes() if self.useAttr: genes = [attr.name for attr in self.data.domain.attributes] elif self.attributes: attr = self.attributes[self.gene_attr] genes = [ str(ex[attr]) for ex in self.data if not math.isnan(ex[attr]) ] else: genes = [] if not genes: self.warning(0, "Could not extract genes from input dataset.") self.warning(1) org = self.organisms[min(self.organism_index, len(self.organisms) - 1)] source_name, info_getter = self.infoSource() self.error(0) self.updateDictyExpressLink(genes, show=org == DICTY_TAXID) self.altSourceCheck.setVisible(org == DICTY_TAXID) self.progressBarInit() self.setBlocking(True) self.setEnabled(False) self.infoLabel.setText("Retrieving info records.\n") self.genes = genes task = Task(function=partial( info_getter, org, genes, advance=methodinvoke(self, "advance", ( )))) self.itemsfuture = self.executor.submit(task) task.finished.connect(self._onItemsCompleted) def _onItemsCompleted(self): self.setBlocking(False) self.progressBarFinished() self.setEnabled(True) try: schema, geneinfo = self.itemsfuture.result() finally: self.itemsfuture = None self.geneinfo = geneinfo = list(zip(self.genes, geneinfo)) self.cells = cells = [] self.row2geneinfo = {} links = [] for i, (_, gi) in enumerate(geneinfo): if gi: row = [] for _, item in zip(schema, gi): if isinstance(item, Link): # TODO: This should be handled by delegates row.append(item.text) links.append(item.link) else: row.append(item) cells.append(row) self.row2geneinfo[len(cells) - 1] = i model = TreeModel(cells, [str(col) for col in schema], None) model.setColumnLinks(0, links) proxyModel = QSortFilterProxyModel(self) proxyModel.setSourceModel(model) self.treeWidget.setModel(proxyModel) self.treeWidget.selectionModel().selectionChanged.connect(self.commit) for i in range(7): self.treeWidget.resizeColumnToContents(i) self.treeWidget.setColumnWidth( i, min(self.treeWidget.columnWidth(i), 200)) self.infoLabel.setText("%i genes\n%i matched NCBI's IDs" % (len(self.genes), len(cells))) self.matchedInfo = len(self.genes), len(cells) def clear(self): self.infoLabel.setText("No data on input\n") self.treeWidget.setModel( TreeModel([], [ "NCBI ID", "Symbol", "Locus Tag", "Chromosome", "Description", "Synonyms", "Nomenclature" ], self.treeWidget)) self.geneAttrComboBox.clear() self.send("Data Subset", None) def commit(self): if self.data is None: self.send("Data Subset", None) return model = self.treeWidget.model() selection = self.treeWidget.selectionModel().selection() selection = model.mapSelectionToSource(selection) selectedRows = list( chain(*(range(r.top(), r.bottom() + 1) for r in selection))) model = model.sourceModel() selectedGeneids = [self.row2geneinfo[row] for row in selectedRows] selectedIds = [self.geneinfo[i][0] for i in selectedGeneids] selectedIds = set(selectedIds) gene2row = dict((self.geneinfo[self.row2geneinfo[row]][0], row) for row in selectedRows) isselected = selectedIds.__contains__ if self.useAttr: def is_selected(attr): return attr.name in selectedIds attrs = [ attr for attr in self.data.domain.attributes if isselected(attr.name) ] domain = Orange.data.Domain(attrs, self.data.domain.class_vars, self.data.domain.metas) newdata = self.data.from_table(domain, self.data) self.send("Data Subset", newdata) elif self.attributes: attr = self.attributes[self.gene_attr] gene_col = [ attr.str_val(v) for v in self.data.get_column_view(attr)[0] ] gene_col = [(i, name) for i, name in enumerate(gene_col) if isselected(name)] indices = [i for i, _ in gene_col] # Add a gene info columns to the output headers = [ str(model.headerData(i, Qt.Horizontal, Qt.DisplayRole)) for i in range(model.columnCount()) ] metas = [Orange.data.StringVariable(name) for name in headers] domain = Orange.data.Domain(self.data.domain.attributes, self.data.domain.class_vars, self.data.domain.metas + tuple(metas)) newdata = self.data.from_table(domain, self.data)[indices] model_rows = [gene2row[gene] for _, gene in gene_col] for col, meta in zip(range(model.columnCount()), metas): col_data = [ str(model.index(row, col).data(Qt.DisplayRole)) for row in model_rows ] col_data = np.array(col_data, dtype=object, ndmin=2).T newdata[:, meta] = col_data if not len(newdata): newdata = None self.send("Data Subset", newdata) else: self.send("Data Subset", None) def rowFiltered(self, row): searchStrings = self.search_string.lower().split() row = " ".join(self.cells[row]).lower() return not all([s in row for s in searchStrings]) def searchUpdate(self): if not self.data: return searchStrings = self.search_string.lower().split() index = self.treeWidget.model().sourceModel().index mapFromSource = self.treeWidget.model().mapFromSource for i, row in enumerate(self.cells): row = " ".join(row).lower() self.treeWidget.setRowHidden( mapFromSource(index(i, 0)).row(), QModelIndex(), not all([s in row for s in searchStrings])) def selectFiltered(self): if not self.data: return itemSelection = QItemSelection() index = self.treeWidget.model().sourceModel().index mapFromSource = self.treeWidget.model().mapFromSource for i, row in enumerate(self.cells): if not self.rowFiltered(i): itemSelection.select(mapFromSource(index(i, 0)), mapFromSource(index(i, 0))) self.treeWidget.selectionModel().select( itemSelection, QItemSelectionModel.Select | QItemSelectionModel.Rows) def updateDictyExpressLink(self, genes, show=False): def fix(ddb): if ddb.startswith("DDB"): if not ddb.startswith("DDB_G"): ddb = ddb.replace("DDB", "DDB_G") return ddb return None if show: genes = [fix(gene) for gene in genes if fix(gene)] link1 = '<a href="http://dictyexpress.biolab.si/run/index.php?gene=%s">Microarray profile</a>' link2 = '<a href="http://dictyexpress.biolab.si/run/index.php?gene=%s&db=rnaseq">RNA-Seq profile</a>' self.linkLabel.setText(link1 + "<br/>" + link2) show = any(genes) if show: self.dictyExpressBox.show() else: self.dictyExpressBox.hide() def onDictyExpressLink(self, link): if not self.data: return selectedIndexes = self.treeWidget.selectedIndexes() if not len(selectedIndexes): QMessageBox.information(self, "No gene ids selected", "Please select some genes and try again.") return model = self.treeWidget.model() mapToSource = model.mapToSource selectedRows = self.treeWidget.selectedIndexes() selectedRows = [mapToSource(index).row() for index in selectedRows] model = model.sourceModel() selectedGeneids = [self.row2geneinfo[row] for row in selectedRows] selectedIds = [self.geneinfo[i][0] for i in selectedGeneids] selectedIds = set(selectedIds) def fix(ddb): if ddb.startswith("DDB"): if not ddb.startswith("DDB_G"): ddb = ddb.replace("DDB", "DDB_G") return ddb return None genes = [fix(gene) for gene in selectedIds if fix(gene)] url = str(link) % " ".join(genes) QDesktopServices.openUrl(QUrl(url)) def onAltSourceChange(self): self.updateInfoItems() def onDeleteWidget(self): # try to cancel pending tasks if self.initfuture: self.initfuture.cancel() if self.itemsfuture: self.itemsfuture.cancel() self.executor.shutdown(wait=False) super().onDeleteWidget()
class OWGeneInfo(widget.OWWidget): name = "Gene Info" description = "Displays gene information from NCBI and other sources." icon = "../widgets/icons/GeneInfo.svg" priority = 2010 inputs = [("Data", Orange.data.Table, "setData")] outputs = [("Data Subset", Orange.data.Table)] settingsHandler = settings.DomainContextHandler() organism_index = settings.ContextSetting(0) taxid = settings.ContextSetting("9606") gene_attr = settings.ContextSetting(0) auto_commit = settings.Setting(False) search_string = settings.Setting("") useAttr = settings.ContextSetting(False) useAltSource = settings.ContextSetting(False) def __init__(self, parent=None, ): super().__init__(self, parent) self.selectionChangedFlag = False self.__initialized = False self.initfuture = None self.itemsfuture = None self.infoLabel = gui.widgetLabel( gui.widgetBox(self.controlArea, "Info", addSpace=True), "Initializing\n" ) self.organisms = None self.organismBox = gui.widgetBox( self.controlArea, "Organism", addSpace=True) self.organismComboBox = gui.comboBox( self.organismBox, self, "organism_index", callback=self._onSelectedOrganismChanged) # For now only support one alt source, with a checkbox # In the future this can be extended to multiple selections self.altSourceCheck = gui.checkBox( self.organismBox, self, "useAltSource", "Show information from dictyBase", callback=self.onAltSourceChange) self.altSourceCheck.hide() box = gui.widgetBox(self.controlArea, "Gene names", addSpace=True) self.geneAttrComboBox = gui.comboBox( box, self, "gene_attr", "Gene attribute", callback=self.updateInfoItems ) self.geneAttrComboBox.setEnabled(not self.useAttr) cb = gui.checkBox(box, self, "useAttr", "Use attribute names", callback=self.updateInfoItems) cb.toggled[bool].connect(self.geneAttrComboBox.setDisabled) gui.auto_commit(self.controlArea, self, "auto_commit", "Commit") # A label for dictyExpress link (Why oh god why???) self.dictyExpressBox = gui.widgetBox( self.controlArea, "Dicty Express") self.linkLabel = gui.widgetLabel(self.dictyExpressBox, "") self.linkLabel.setOpenExternalLinks(False) self.linkLabel.linkActivated.connect(self.onDictyExpressLink) self.dictyExpressBox.hide() gui.rubber(self.controlArea) gui.lineEdit(self.mainArea, self, "search_string", "Filter", callbackOnType=True, callback=self.searchUpdate) self.treeWidget = QTreeView( self.mainArea, selectionMode=QTreeView.ExtendedSelection, rootIsDecorated=False, uniformRowHeights=True, sortingEnabled=True) self.treeWidget.setItemDelegate( gui.LinkStyledItemDelegate(self.treeWidget)) self.treeWidget.viewport().setMouseTracking(True) self.mainArea.layout().addWidget(self.treeWidget) box = gui.widgetBox(self.mainArea, "", orientation="horizontal") gui.button(box, self, "Select Filtered", callback=self.selectFiltered) gui.button(box, self, "Clear Selection", callback=self.treeWidget.clearSelection) self.geneinfo = [] self.cells = [] self.row2geneinfo = {} self.data = None # : (# input genes, # matches genes) self.matchedInfo = 0, 0 self.setBlocking(True) self.executor = ThreadExecutor(self) self.progressBarInit() task = Task( function=partial( taxonomy.ensure_downloaded, callback=methodinvoke(self, "advance", ()) ) ) task.resultReady.connect(self.initialize) task.exceptionReady.connect(self._onInitializeError) self.initfuture = self.executor.submit(task) def sizeHint(self): return QSize(1024, 720) @Slot() def advance(self): assert self.thread() is QThread.currentThread() self.progressBarSet(self.progressBarValue + 1, processEvents=None) def initialize(self): if self.__initialized: # Already initialized return self.__initialized = True self.organisms = sorted( set([name.split(".")[-2] for name in serverfiles.listfiles("NCBI_geneinfo")] + gene.NCBIGeneInfo.common_taxids()) ) self.organismComboBox.addItems( [taxonomy.name(tax_id) for tax_id in self.organisms] ) if self.taxid in self.organisms: self.organism_index = self.organisms.index(self.taxid) else: self.organism_index = 0 self.taxid = self.organisms[self.organism_index] self.altSourceCheck.setVisible(self.taxid == DICTY_TAXID) self.dictyExpressBox.setVisible(self.taxid == DICTY_TAXID) self.infoLabel.setText("No data on input\n") self.initfuture = None self.setBlocking(False) self.progressBarFinished(processEvents=None) def _onInitializeError(self, exc): sys.excepthook(type(exc), exc, None) self.error(0, "Could not download the necessary files.") def _onSelectedOrganismChanged(self): assert 0 <= self.organism_index <= len(self.organisms) self.taxid = self.organisms[self.organism_index] self.altSourceCheck.setVisible(self.taxid == DICTY_TAXID) self.dictyExpressBox.setVisible(self.taxid == DICTY_TAXID) if self.data is not None: self.updateInfoItems() def setData(self, data=None): if not self.__initialized: self.initfuture.result() self.initialize() if self.itemsfuture is not None: raise Exception("Already processing") self.closeContext() self.data = data if data is not None: self.geneAttrComboBox.clear() self.attributes = \ [attr for attr in data.domain.variables + data.domain.metas if isinstance(attr, (Orange.data.StringVariable, Orange.data.DiscreteVariable))] for var in self.attributes: self.geneAttrComboBox.addItem(*gui.attributeItem(var)) self.taxid = data_hints.get_hint(self.data, "taxid", self.taxid) self.useAttr = data_hints.get_hint( self.data, "genesinrows", self.useAttr) self.openContext(data) self.gene_attr = min(self.gene_attr, len(self.attributes) - 1) if self.taxid in self.organisms: self.organism_index = self.organisms.index(self.taxid) else: self.organism_index = 0 self.taxid = self.organisms[self.organism_index] self.updateInfoItems() else: self.clear() def infoSource(self): """ Return the current selected info source getter function from INFO_SOURCES """ org = self.organisms[min(self.organism_index, len(self.organisms) - 1)] if org not in INFO_SOURCES: org = "default" sources = INFO_SOURCES[org] name, func = sources[min(self.useAltSource, len(sources) - 1)] return name, func def inputGenes(self): if self.useAttr: genes = [attr.name for attr in self.data.domain.attributes] elif self.attributes: attr = self.attributes[self.gene_attr] genes = [str(ex[attr]) for ex in self.data if not math.isnan(ex[attr])] else: genes = [] return genes def updateInfoItems(self): self.warning(0) if self.data is None: return genes = self.inputGenes() if self.useAttr: genes = [attr.name for attr in self.data.domain.attributes] elif self.attributes: attr = self.attributes[self.gene_attr] genes = [str(ex[attr]) for ex in self.data if not math.isnan(ex[attr])] else: genes = [] if not genes: self.warning(0, "Could not extract genes from input dataset.") self.warning(1) org = self.organisms[min(self.organism_index, len(self.organisms) - 1)] source_name, info_getter = self.infoSource() self.error(0) self.updateDictyExpressLink(genes, show=org == DICTY_TAXID) self.altSourceCheck.setVisible(org == DICTY_TAXID) self.progressBarInit() self.setBlocking(True) self.setEnabled(False) self.infoLabel.setText("Retrieving info records.\n") self.genes = genes task = Task( function=partial( info_getter, org, genes, advance=methodinvoke(self, "advance", ())) ) self.itemsfuture = self.executor.submit(task) task.finished.connect(self._onItemsCompleted) def _onItemsCompleted(self): self.setBlocking(False) self.progressBarFinished() self.setEnabled(True) try: schema, geneinfo = self.itemsfuture.result() finally: self.itemsfuture = None self.geneinfo = geneinfo = list(zip(self.genes, geneinfo)) self.cells = cells = [] self.row2geneinfo = {} links = [] for i, (_, gi) in enumerate(geneinfo): if gi: row = [] for _, item in zip(schema, gi): if isinstance(item, Link): # TODO: This should be handled by delegates row.append(item.text) links.append(item.link) else: row.append(item) cells.append(row) self.row2geneinfo[len(cells) - 1] = i model = TreeModel(cells, [str(col) for col in schema], None) model.setColumnLinks(0, links) proxyModel = QSortFilterProxyModel(self) proxyModel.setSourceModel(model) self.treeWidget.setModel(proxyModel) self.treeWidget.selectionModel().selectionChanged.connect(self.commit) for i in range(7): self.treeWidget.resizeColumnToContents(i) self.treeWidget.setColumnWidth( i, min(self.treeWidget.columnWidth(i), 200) ) self.infoLabel.setText("%i genes\n%i matched NCBI's IDs" % (len(self.genes), len(cells))) self.matchedInfo = len(self.genes), len(cells) def clear(self): self.infoLabel.setText("No data on input\n") self.treeWidget.setModel( TreeModel([], ["NCBI ID", "Symbol", "Locus Tag", "Chromosome", "Description", "Synonyms", "Nomenclature"], self.treeWidget)) self.geneAttrComboBox.clear() self.send("Data Subset", None) def commit(self): if self.data is None: self.send("Data Subset", None) return model = self.treeWidget.model() selection = self.treeWidget.selectionModel().selection() selection = model.mapSelectionToSource(selection) selectedRows = list( chain(*(range(r.top(), r.bottom() + 1) for r in selection)) ) model = model.sourceModel() selectedGeneids = [self.row2geneinfo[row] for row in selectedRows] selectedIds = [self.geneinfo[i][0] for i in selectedGeneids] selectedIds = set(selectedIds) gene2row = dict((self.geneinfo[self.row2geneinfo[row]][0], row) for row in selectedRows) isselected = selectedIds.__contains__ if self.useAttr: def is_selected(attr): return attr.name in selectedIds attrs = [attr for attr in self.data.domain.attributes if isselected(attr.name)] domain = Orange.data.Domain( attrs, self.data.domain.class_vars, self.data.domain.metas) newdata = self.data.from_table(domain, self.data) self.send("Data Subset", newdata) elif self.attributes: attr = self.attributes[self.gene_attr] gene_col = [attr.str_val(v) for v in self.data.get_column_view(attr)[0]] gene_col = [(i, name) for i, name in enumerate(gene_col) if isselected(name)] indices = [i for i, _ in gene_col] # Add a gene info columns to the output headers = [str(model.headerData(i, Qt.Horizontal, Qt.DisplayRole)) for i in range(model.columnCount())] metas = [Orange.data.StringVariable(name) for name in headers] domain = Orange.data.Domain( self.data.domain.attributes, self.data.domain.class_vars, self.data.domain.metas + tuple(metas)) newdata = self.data.from_table(domain, self.data)[indices] model_rows = [gene2row[gene] for _, gene in gene_col] for col, meta in zip(range(model.columnCount()), metas): col_data = [str(model.index(row, col).data(Qt.DisplayRole)) for row in model_rows] col_data = np.array(col_data, dtype=object, ndmin=2).T newdata[:, meta] = col_data if not len(newdata): newdata = None self.send("Data Subset", newdata) else: self.send("Data Subset", None) def rowFiltered(self, row): searchStrings = self.search_string.lower().split() row = " ".join(self.cells[row]).lower() return not all([s in row for s in searchStrings]) def searchUpdate(self): if not self.data: return searchStrings = self.search_string.lower().split() index = self.treeWidget.model().sourceModel().index mapFromSource = self.treeWidget.model().mapFromSource for i, row in enumerate(self.cells): row = " ".join(row).lower() self.treeWidget.setRowHidden( mapFromSource(index(i, 0)).row(), QModelIndex(), not all([s in row for s in searchStrings])) def selectFiltered(self): if not self.data: return itemSelection = QItemSelection() index = self.treeWidget.model().sourceModel().index mapFromSource = self.treeWidget.model().mapFromSource for i, row in enumerate(self.cells): if not self.rowFiltered(i): itemSelection.select(mapFromSource(index(i, 0)), mapFromSource(index(i, 0))) self.treeWidget.selectionModel().select( itemSelection, QItemSelectionModel.Select | QItemSelectionModel.Rows) def updateDictyExpressLink(self, genes, show=False): def fix(ddb): if ddb.startswith("DDB"): if not ddb.startswith("DDB_G"): ddb = ddb.replace("DDB", "DDB_G") return ddb return None if show: genes = [fix(gene) for gene in genes if fix(gene)] link1 = '<a href="http://dictyexpress.biolab.si/run/index.php?gene=%s">Microarray profile</a>' link2 = '<a href="http://dictyexpress.biolab.si/run/index.php?gene=%s&db=rnaseq">RNA-Seq profile</a>' self.linkLabel.setText(link1 + "<br/>" + link2) show = any(genes) if show: self.dictyExpressBox.show() else: self.dictyExpressBox.hide() def onDictyExpressLink(self, link): if not self.data: return selectedIndexes = self.treeWidget.selectedIndexes() if not len(selectedIndexes): QMessageBox.information( self, "No gene ids selected", "Please select some genes and try again." ) return model = self.treeWidget.model() mapToSource = model.mapToSource selectedRows = self.treeWidget.selectedIndexes() selectedRows = [mapToSource(index).row() for index in selectedRows] model = model.sourceModel() selectedGeneids = [self.row2geneinfo[row] for row in selectedRows] selectedIds = [self.geneinfo[i][0] for i in selectedGeneids] selectedIds = set(selectedIds) def fix(ddb): if ddb.startswith("DDB"): if not ddb.startswith("DDB_G"): ddb = ddb.replace("DDB", "DDB_G") return ddb return None genes = [fix(gene) for gene in selectedIds if fix(gene)] url = str(link) % " ".join(genes) QDesktopServices.openUrl(QUrl(url)) def onAltSourceChange(self): self.updateInfoItems() def onDeleteWidget(self): # try to cancel pending tasks if self.initfuture: self.initfuture.cancel() if self.itemsfuture: self.itemsfuture.cancel() self.executor.shutdown(wait=False) super().onDeleteWidget()