class OWGEODatasets(OWWidget): name = "GEO Data Sets" description = DESCRIPTION icon = "../widgets/icons/GEODataSets.svg" priority = PRIORITY inputs = [] outputs = [("Expression Data", Orange.data.Table)] settingsList = [ "outputRows", "mergeSpots", "gdsSelectionStates", "splitterSettings", "currentGds", "autoCommit", "datasetNames" ] outputRows = Setting(True) mergeSpots = Setting(True) gdsSelectionStates = Setting({}) currentGds = Setting(None) datasetNames = Setting({}) splitterSettings = Setting(( b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xea\x00\x00\x00\xd7\x01\x00\x00\x00\x07\x01\x00\x00\x00\x02', b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xb5\x00\x00\x02\x10\x01\x00\x00\x00\x07\x01\x00\x00\x00\x01' )) autoCommit = Setting(False) def __init__(self, parent=None, signalManager=None, name=" GEO Data Sets"): OWWidget.__init__(self, parent, signalManager, name) self.selectionChanged = False self.filterString = "" self.datasetName = "" ## GUI box = gui.widgetBox(self.controlArea, "Info", addSpace=True) self.infoBox = gui.widgetLabel(box, "Initializing\n\n") box = gui.widgetBox(self.controlArea, "Output", addSpace=True) gui.radioButtonsInBox(box, self, "outputRows", ["Genes in rows", "Samples in rows"], "Rows", callback=self.commitIf) gui.checkBox(box, self, "mergeSpots", "Merge spots of same gene", callback=self.commitIf) gui.separator(box) self.nameEdit = gui.lineEdit( box, self, "datasetName", "Data set name", tooltip="Override the default output data set name", callback=self.onNameEdited) self.nameEdit.setPlaceholderText("") if sys.version_info < (3, ): box = gui.widgetBox(self.controlArea, "Commit", addSpace=True) self.commitButton = gui.button(box, self, "Commit", callback=self.commit) cb = gui.checkBox(box, self, "autoCommit", "Commit on any change") gui.setStopper(self, self.commitButton, cb, "selectionChanged", self.commit) else: gui.auto_commit(self.controlArea, self, "autoCommit", "Commit", box="Commit") self.commitIf = self.commit gui.rubber(self.controlArea) gui.widgetLabel(self.mainArea, "Filter") self.filterLineEdit = QLineEdit(textChanged=self.filter) self.completer = TokenListCompleter(self, caseSensitivity=Qt.CaseInsensitive) self.filterLineEdit.setCompleter(self.completer) self.mainArea.layout().addWidget(self.filterLineEdit) splitter = QSplitter(Qt.Vertical, self.mainArea) self.mainArea.layout().addWidget(splitter) self.treeWidget = QTreeView(splitter) self.treeWidget.setSelectionMode(QTreeView.SingleSelection) self.treeWidget.setRootIsDecorated(False) self.treeWidget.setSortingEnabled(True) self.treeWidget.setAlternatingRowColors(True) self.treeWidget.setUniformRowHeights(True) self.treeWidget.setEditTriggers(QTreeView.NoEditTriggers) linkdelegate = LinkStyledItemDelegate(self.treeWidget) self.treeWidget.setItemDelegateForColumn(1, linkdelegate) self.treeWidget.setItemDelegateForColumn(8, linkdelegate) self.treeWidget.setItemDelegateForColumn( 0, gui.IndicatorItemDelegate(self.treeWidget, role=Qt.DisplayRole)) proxyModel = MySortFilterProxyModel(self.treeWidget) self.treeWidget.setModel(proxyModel) self.treeWidget.selectionModel().selectionChanged.connect( self.updateSelection) self.treeWidget.viewport().setMouseTracking(True) splitterH = QSplitter(Qt.Horizontal, splitter) box = gui.widgetBox(splitterH, "Description") self.infoGDS = gui.widgetLabel(box, "") self.infoGDS.setWordWrap(True) gui.rubber(box) box = gui.widgetBox(splitterH, "Sample Annotations") self.annotationsTree = QTreeWidget(box) self.annotationsTree.setHeaderLabels( ["Type (Sample annotations)", "Sample count"]) self.annotationsTree.setRootIsDecorated(True) box.layout().addWidget(self.annotationsTree) self.annotationsTree.itemChanged.connect( self.annotationSelectionChanged) self._annotationsUpdating = False self.splitters = splitter, splitterH for sp, setting in zip(self.splitters, self.splitterSettings): sp.splitterMoved.connect(self.splitterMoved) sp.restoreState(setting) self.searchKeys = [ "dataset_id", "title", "platform_organism", "description" ] self.gds = [] self.gds_info = None self.resize(1000, 600) self.setBlocking(True) self.setEnabled(False) self.progressBarInit() self._executor = ThreadExecutor() func = partial(get_gds_model, methodinvoke(self, "_setProgress", (float, ))) self._inittask = Task(function=func) self._inittask.finished.connect(self._initializemodel) self._executor.submit(self._inittask) self._datatask = None @Slot(float) def _setProgress(self, value): self.progressBarValue = value def _initializemodel(self): assert self.thread() is QThread.currentThread() model, self.gds_info, self.gds = self._inittask.result() model.setParent(self) proxy = self.treeWidget.model() proxy.setFilterKeyColumn(0) proxy.setFilterRole(TextFilterRole) proxy.setFilterCaseSensitivity(False) proxy.setFilterFixedString(self.filterString) proxy.setSourceModel(model) proxy.sort(0, Qt.DescendingOrder) self.progressBarFinished() self.setBlocking(False) self.setEnabled(True) filter_items = " ".join(gds[key] for gds in self.gds for key in self.searchKeys) tr_chars = ",.:;!?(){}[]_-+\\|/%#@$^&*<>~`" tr_table = str.maketrans(tr_chars, " " * len(tr_chars)) filter_items = filter_items.translate(tr_table) filter_items = sorted(set(filter_items.split(" "))) filter_items = [item for item in filter_items if len(item) > 3] self.completer.setTokenList(filter_items) if self.currentGds: current_id = self.currentGds["dataset_id"] gdss = [(i, qunpack(proxy.data(proxy.index(i, 1), Qt.DisplayRole))) for i in range(proxy.rowCount())] current = [i for i, data in gdss if data and data == current_id] if current: current_index = proxy.index(current[0], 0) self.treeWidget.selectionModel().select( current_index, QItemSelectionModel.Select | QItemSelectionModel.Rows) self.treeWidget.scrollTo(current_index, QTreeView.PositionAtCenter) for i in range(8): self.treeWidget.resizeColumnToContents(i) self.treeWidget.setColumnWidth( 1, min(self.treeWidget.columnWidth(1), 300)) self.treeWidget.setColumnWidth( 2, min(self.treeWidget.columnWidth(2), 200)) self.updateInfo() def updateInfo(self): gds_info = self.gds_info text = ("%i datasets\n%i datasets cached\n" % (len(gds_info), len(glob.glob(serverfiles.localpath("GEO") + "/GDS*")))) filtered = self.treeWidget.model().rowCount() if len(self.gds) != filtered: text += ("%i after filtering") % filtered self.infoBox.setText(text) def updateSelection(self, *args): current = self.treeWidget.selectedIndexes() mapToSource = self.treeWidget.model().mapToSource current = [mapToSource(index).row() for index in current] if current: self.currentGds = self.gds[current[0]] self.setAnnotations(self.currentGds) self.infoGDS.setText(self.currentGds.get("description", "")) self.nameEdit.setPlaceholderText(self.currentGds["title"]) self.datasetName = \ self.datasetNames.get(self.currentGds["dataset_id"], "") else: self.currentGds = None self.nameEdit.setPlaceholderText("") self.datasetName = "" self.commitIf() def setAnnotations(self, gds): self._annotationsUpdating = True self.annotationsTree.clear() annotations = defaultdict(set) subsetscount = {} for desc in gds["subsets"]: annotations[desc["type"]].add(desc["description"]) subsetscount[desc["description"]] = str(len(desc["sample_id"])) for type, subsets in annotations.items(): key = (gds["dataset_id"], type) subsetItem = QTreeWidgetItem(self.annotationsTree, [type]) subsetItem.setFlags(subsetItem.flags() | Qt.ItemIsUserCheckable | Qt.ItemIsTristate) subsetItem.setCheckState( 0, self.gdsSelectionStates.get(key, Qt.Checked)) subsetItem.key = key for subset in subsets: key = (gds["dataset_id"], type, subset) item = QTreeWidgetItem( subsetItem, [subset, subsetscount.get(subset, "")]) item.setFlags(item.flags() | Qt.ItemIsUserCheckable) item.setCheckState( 0, self.gdsSelectionStates.get(key, Qt.Checked)) item.key = key self._annotationsUpdating = False self.annotationsTree.expandAll() for i in range(self.annotationsTree.columnCount()): self.annotationsTree.resizeColumnToContents(i) def annotationSelectionChanged(self, item, column): if self._annotationsUpdating: return for i in range(self.annotationsTree.topLevelItemCount()): item = self.annotationsTree.topLevelItem(i) self.gdsSelectionStates[item.key] = item.checkState(0) for j in range(item.childCount()): child = item.child(j) self.gdsSelectionStates[child.key] = child.checkState(0) def filter(self): filter_string = unicode(self.filterLineEdit.text()) proxyModel = self.treeWidget.model() if proxyModel: strings = filter_string.lower().strip().split() proxyModel.setFilterFixedStrings(strings) self.updateInfo() def selectedSamples(self): """ Return the currently selected sample annotations. The return value is a list of selected (sample type, sample value) tuples. .. note:: if some Sample annotation type has no selected values. this method will return all values for it. """ samples = [] unused_types = [] used_types = [] for stype in childiter(self.annotationsTree.invisibleRootItem()): selected_values = [] all_values = [] for sval in childiter(stype): value = (str(stype.text(0)), str(sval.text(0))) if self.gdsSelectionStates.get(sval.key, True): selected_values.append(value) all_values.append(value) if selected_values: samples.extend(selected_values) used_types.append(str(stype.text(0))) else: # If no sample of sample type is selected we don't filter # on it. samples.extend(all_values) unused_types.append(str(stype.text(0))) return samples, used_types def commitIf(self): if self.autoCommit: self.commit() else: self.selectionChanged = True @Slot(int, int) def progressCompleted(self, value, total): if total > 0: self.progressBarSet(100. * value / total, processEvents=False) else: pass # TODO: report 'indeterminate progress' def commit(self): if self.currentGds: self.error(0) sample_type = None self.progressBarInit(processEvents=None) _, groups = self.selectedSamples() if len(groups) == 1 and self.outputRows: sample_type = groups[0] self.setEnabled(False) self.setBlocking(True) progress = methodinvoke(self, "progressCompleted", (int, int)) def get_data(gds_id, report_genes, transpose, sample_type, title): gds_ensure_downloaded(gds_id, progress) gds = geo.GDS(gds_id) data = gds.getdata(report_genes=report_genes, transpose=transpose, sample_type=sample_type) data.name = title return data get_data = partial(get_data, self.currentGds["dataset_id"], report_genes=self.mergeSpots, transpose=self.outputRows, sample_type=sample_type, title=self.datasetName or self.currentGds["title"]) self._datatask = Task(function=get_data) self._datatask.finished.connect(self._on_dataready) self._executor.submit(self._datatask) def _on_dataready(self): self.setEnabled(True) self.setBlocking(False) self.progressBarFinished(processEvents=False) try: data = self._datatask.result() except urlrequest.URLError as error: self.error(0, ("Error while connecting to the NCBI ftp server! " "'%s'" % error)) sys.excepthook(type(error), error, getattr(error, "__traceback__")) return finally: self._datatask = None data_name = data.name samples, _ = self.selectedSamples() self.warning(0) message = None if self.outputRows: def samplesinst(ex): out = [] for meta in data.domain.metas: out.append((meta.name, ex[meta].value)) if data.domain.class_var.name != 'class': out.append((data.domain.class_var.name, ex[data.domain.class_var].value)) return out samples = set(samples) mask = [samples.issuperset(samplesinst(ex)) for ex in data] data = data[numpy.array(mask, dtype=bool)] if len(data) == 0: message = "No samples with selected sample annotations." else: samples = set(samples) domain = Orange.data.Domain([ attr for attr in data.domain.attributes if samples.issuperset(attr.attributes.items()) ], data.domain.class_var, data.domain.metas) # domain.addmetas(data.domain.getmetas()) if len(domain.attributes) == 0: message = "No samples with selected sample annotations." stypes = set(s[0] for s in samples) for attr in domain.attributes: attr.attributes = dict( (key, value) for key, value in attr.attributes.items() if key in stypes) data = Orange.data.Table(domain, data) if message is not None: self.warning(0, message) data_hints.set_hint(data, "taxid", self.currentGds.get("taxid", ""), 10.0) data_hints.set_hint(data, "genesinrows", self.outputRows, 10.0) data.name = data_name self.send("Expression Data", data) model = self.treeWidget.model().sourceModel() row = self.gds.index(self.currentGds) model.setData(model.index(row, 0), " ", Qt.DisplayRole) self.updateInfo() self.selectionChanged = False def splitterMoved(self, *args): self.splitterSettings = [ bytes(sp.saveState()) for sp in self.splitters ] def send_report(self): self.report_items("GEO Dataset", [("ID", self.currentGds['dataset_id']), ("Title", self.currentGds['title']), ("Organism", self.currentGds['sample_organism'])]) self.report_items("Data", [("Samples", self.currentGds['sample_count']), ("Features", self.currentGds['feature_count']), ("Genes", self.currentGds['gene_count'])]) self.report_name("Sample annotations") subsets = defaultdict(list) for subset in self.currentGds['subsets']: subsets[subset['type']].append( (subset['description'], len(subset['sample_id']))) self.report_html += "<ul>" for type in subsets: self.report_html += "<b>" + type + ":</b></br>" for desc, count in subsets[type]: self.report_html += 9 * " " + "<b>{}:</b> {}</br>".format( desc, count) self.report_html += "</ul>" def onDeleteWidget(self): if self._inittask: self._inittask.future().cancel() self._inittask.finished.disconnect(self._initializemodel) if self._datatask: self._datatask.future().cancel() self._datatask.finished.disconnect(self._on_dataready) self._executor.shutdown(wait=False) super(OWGEODatasets, self).onDeleteWidget() def onNameEdited(self): if self.currentGds: gds_id = self.currentGds["dataset_id"] self.datasetNames[gds_id] = unicode(self.nameEdit.text()) self.commitIf()
class OWItemsets(widget.OWWidget): name = 'Frequent Itemsets' description = 'Explore sets of items that frequently appear together.' icon = 'icons/FrequentItemsets.svg' priority = 10 class Inputs: data = Input("Data", Table) class Outputs: matching_data = Output("Matching Data", Table) class Error(widget.OWWidget.Error): need_discrete_data = widget.Msg( "Need some discrete data to work with.") no_disc_features = widget.Msg( "Discrete features required but data has none.") class Warning(widget.OWWidget.Warning): cont_attrs = widget.Msg( "Data has continuous attributes which will be skipped.") err_reg_expression = widget.Msg("Error in regular expression: {}") minSupport = settings.Setting(30) maxItemsets = settings.Setting(10000) filterSearch = settings.Setting(True) autoFind = settings.Setting(False) autoSend = settings.Setting(True) filterKeywords = settings.Setting('') filterMinItems = settings.Setting(1) filterMaxItems = settings.Setting(10000) UserAdviceMessages = [ widget.Message( 'Itemset are listed in item-sorted order, i.e. ' 'an itemset containing A and B is only listed once, as ' 'A > B (and not also B > A).', 'itemsets-order', widget.Message.Warning), widget.Message( 'To select all the itemsets that are descendants of ' '(include) some item X (i.e. the whole subtree), you ' 'can fold the subtree at that item and then select it.', 'itemsets-order', widget.Message.Information) ] def __init__(self): self.data = None self.output = None self._is_running = False self.isRegexMatch = lambda x: True self.tree = QTreeWidget(self.mainArea, columnCount=2, allColumnsShowFocus=True, alternatingRowColors=True, selectionMode=QTreeWidget.ExtendedSelection, uniformRowHeights=True) self.tree.setHeaderLabels(["Itemsets", "Support", "%"]) self.tree.header().setStretchLastSection(True) self.tree.itemSelectionChanged.connect(self.selectionChanged) self.mainArea.layout().addWidget(self.tree) box = gui.widgetBox(self.controlArea, "Info") self.nItemsets = self.nSelectedExamples = self.nSelectedItemsets = '' gui.label(box, self, "Number of itemsets: %(nItemsets)s") gui.label(box, self, "Selected itemsets: %(nSelectedItemsets)s") gui.label(box, self, "Selected examples: %(nSelectedExamples)s") hbox = gui.widgetBox(box, orientation='horizontal') gui.button(hbox, self, "Expand all", callback=self.tree.expandAll) gui.button(hbox, self, "Collapse all", callback=self.tree.collapseAll) box = gui.widgetBox(self.controlArea, 'Find itemsets') gui.valueSlider(box, self, 'minSupport', values=[.0001, .0005, .001, .005, .01, .05, .1, .5] + list(range(1, 101)), label='Minimal support:', labelFormat="%g%%", callback=lambda: self.find_itemsets()) gui.hSlider(box, self, 'maxItemsets', minValue=10000, maxValue=100000, step=10000, label='Max. number of itemsets:', labelFormat="%d", callback=lambda: self.find_itemsets()) self.button = gui.auto_commit( box, self, 'autoFind', 'Find Itemsets', commit=self.find_itemsets, callback=lambda: self.autoFind and self.find_itemsets()) box = gui.widgetBox(self.controlArea, 'Filter itemsets') gui.lineEdit(box, self, 'filterKeywords', 'Contains:', callback=self.filter_change, orientation='horizontal', tooltip='A comma or space-separated list of regular ' 'expressions.') hbox = gui.widgetBox(box, orientation='horizontal') gui.spin(hbox, self, 'filterMinItems', 1, 998, label='Min. items:', callback=self.filter_change) gui.spin(hbox, self, 'filterMaxItems', 2, 999, label='Max. items:', callback=self.filter_change) gui.checkBox(box, self, 'filterSearch', label='Apply these filters in search', tooltip='If checked, the itemsets are filtered according ' 'to these filter conditions already in the search ' 'phase. \nIf unchecked, the only filters applied ' 'during search are the ones above, ' 'and the itemsets are \nfiltered afterwards only for ' 'display, i.e. only the matching itemsets are shown.') gui.rubber(hbox) gui.rubber(self.controlArea) gui.auto_commit(self.controlArea, self, 'autoSend', 'Send selection') self.filter_change() ITEM_DATA_ROLE = Qt.UserRole + 1 def selectionChanged(self): X = self.X mapping = self.onehot_mapping instances = set() where = np.where def whole_subtree(node): yield node for i in range(node.childCount()): yield from whole_subtree(node.child(i)) def itemset(node): while node: yield node.data(0, self.ITEM_DATA_ROLE) node = node.parent() def selection_ranges(node): n_children = node.childCount() if n_children: yield (self.tree.indexFromItem(node.child(0)), self.tree.indexFromItem(node.child(n_children - 1))) for i in range(n_children): yield from selection_ranges(node.child(i)) nSelectedItemsets = 0 item_selection = QItemSelection() for node in self.tree.selectedItems(): nodes = (node, ) if node.isExpanded() else whole_subtree(node) if not node.isExpanded(): for srange in selection_ranges(node): item_selection.select(*srange) for node in nodes: nSelectedItemsets += 1 cols, vals = zip(*(mapping[i] for i in itemset(node))) if issparse(X): rows = (len(cols) == np.bincount( (X[:, cols] != 0).indices, minlength=X.shape[0])).nonzero()[0] else: rows = where((X[:, cols] == vals).all(axis=1))[0] instances.update(rows) self.tree.itemSelectionChanged.disconnect(self.selectionChanged) self.tree.selectionModel().select( item_selection, QItemSelectionModel.Select | QItemSelectionModel.Rows) self.tree.itemSelectionChanged.connect(self.selectionChanged) self.nSelectedExamples = len(instances) self.nSelectedItemsets = nSelectedItemsets self.output = self.data[sorted(instances)] or None self.commit() def commit(self): self.Outputs.matching_data.send(self.output) def filter_change(self): self.Warning.err_reg_expression.clear() try: isRegexMatch = self.isRegexMatch = re.compile( '|'.join( i.strip() for i in re.split('(,|\s)+', self.filterKeywords.strip()) if i.strip()), re.IGNORECASE).search except Exception as e: self.Warning.err_reg_expression(e.args[0]) isRegexMatch = self.isRegexMatch = lambda x: True def hide(node, depth, has_kw): if not has_kw: has_kw = isRegexMatch(node.text(0)) hidden = ( sum( hide(node.child(i), depth + 1, has_kw) for i in range(node.childCount())) == node.childCount() if node.childCount() else (not has_kw or not self.filterMinItems <= depth <= self.filterMaxItems)) node.setHidden(hidden) return hidden hide(self.tree.invisibleRootItem(), 0, False) class TreeWidgetItem(QTreeWidgetItem): def data(self, column, role): """Construct lazy tooltips""" if role != Qt.ToolTipRole: return super().data(column, role) tooltip = [] while self: tooltip.append(self.text(0)) self = self.parent() return '\n'.join(reversed(tooltip)) def find_itemsets(self): if self.data is None or not len(self.data): return if self._is_running: self._is_running = False return self._is_running = True self.button.button.setText('Cancel') data = self.data self.tree.clear() self.tree.setUpdatesEnabled(False) self.tree.blockSignals(True) class ItemDict(dict): def __init__(self, item): self.item = item top = ItemDict(self.tree.invisibleRootItem()) X, mapping = OneHot.encode(data) self.Error.need_discrete_data.clear() if X is None: self.Error.need_discrete_data() self.onehot_mapping = mapping ITEM_FMT = '{}' if issparse(data.X) else '{}={}' names = { item: ITEM_FMT.format(var.name, val) for item, var, val in OneHot.decode(mapping.keys(), data, mapping) } nItemsets = 0 filterSearch = self.filterSearch filterMinItems, filterMaxItems = self.filterMinItems, self.filterMaxItems isRegexMatch = self.isRegexMatch # Find itemsets and populate the TreeView with self.progressBar(self.maxItemsets + 1) as progress: for itemset, support in frequent_itemsets(X, self.minSupport / 100): if filterSearch and not filterMinItems <= len( itemset) <= filterMaxItems: continue parent = top first_new_item = None itemset_matches_filter = False for item in sorted(itemset): name = names[item] if filterSearch and not itemset_matches_filter: itemset_matches_filter = isRegexMatch(name) child = parent.get(name) if child is None: try: wi = self.TreeWidgetItem(parent.item, [ name, str(support), '{:.4g}'.format( 100 * support / len(data)) ]) except RuntimeError: # FIXME: When autoFind was in effect and the support # slider was moved, this line excepted with: # RuntimeError: wrapped C/C++ object of type # TreeWidgetItem has been deleted return wi.setData(0, self.ITEM_DATA_ROLE, item) child = parent[name] = ItemDict(wi) if first_new_item is None: first_new_item = (parent, name) parent = child if filterSearch and not itemset_matches_filter: parent, name = first_new_item parent.item.removeChild(parent[name].item) del parent[name].item del parent[name] else: nItemsets += 1 progress.advance() if not self._is_running or nItemsets >= self.maxItemsets: break qApp.processEvents() if not filterSearch: self.filter_change() self.nItemsets = nItemsets self.nSelectedItemsets = 0 self.nSelectedExamples = 0 self.tree.expandAll() for i in range(self.tree.columnCount()): self.tree.resizeColumnToContents(i) self.tree.setUpdatesEnabled(True) self.tree.blockSignals(False) self._is_running = False self.button.button.setText('Find Itemsets') @Inputs.data def set_data(self, data): self.data = data is_error = False if data is not None: self.Warning.cont_attrs.clear() self.Error.no_disc_features.clear() self.button.setDisabled(False) self.X = data.X if issparse(data.X): self.X = data.X.tocsc() else: if not data.domain.has_discrete_attributes(): self.Error.no_disc_features() is_error = True self.button.setDisabled(True) elif data.domain.has_continuous_attributes(): self.Warning.cont_attrs() else: self.output = None self.commit() if self.autoFind and not is_error: self.find_itemsets()
class OWGEODatasets(OWWidget): name = "GEO Data Sets" description = DESCRIPTION icon = "../widgets/icons/GEODataSets.svg" priority = PRIORITY inputs = [] outputs = [("Expression Data", Orange.data.Table)] settingsList = ["outputRows", "mergeSpots", "gdsSelectionStates", "splitterSettings", "currentGds", "autoCommit", "datasetNames"] outputRows = Setting(True) mergeSpots = Setting(True) gdsSelectionStates = Setting({}) currentGds = Setting(None) datasetNames = Setting({}) splitterSettings = Setting( (b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xea\x00\x00\x00\xd7\x01\x00\x00\x00\x07\x01\x00\x00\x00\x02', b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xb5\x00\x00\x02\x10\x01\x00\x00\x00\x07\x01\x00\x00\x00\x01') ) autoCommit = Setting(False) def __init__(self, parent=None, signalManager=None, name=" GEO Data Sets"): OWWidget.__init__(self, parent, signalManager, name) self.selectionChanged = False self.filterString = "" self.datasetName = "" ## GUI box = gui.widgetBox(self.controlArea, "Info", addSpace=True) self.infoBox = gui.widgetLabel(box, "Initializing\n\n") box = gui.widgetBox(self.controlArea, "Output", addSpace=True) gui.radioButtonsInBox(box, self, "outputRows", ["Genes in rows", "Samples in rows"], "Rows", callback=self.commitIf) gui.checkBox(box, self, "mergeSpots", "Merge spots of same gene", callback=self.commitIf) gui.separator(box) self.nameEdit = gui.lineEdit( box, self, "datasetName", "Data set name", tooltip="Override the default output data set name", callback=self.onNameEdited ) self.nameEdit.setPlaceholderText("") if sys.version_info < (3, ): box = gui.widgetBox(self.controlArea, "Commit", addSpace=True) self.commitButton = gui.button( box, self, "Commit", callback=self.commit) cb = gui.checkBox(box, self, "autoCommit", "Commit on any change") gui.setStopper(self, self.commitButton, cb, "selectionChanged", self.commit) else: gui.auto_commit(self.controlArea, self, "autoCommit", "Commit", box="Commit") self.commitIf = self.commit gui.rubber(self.controlArea) gui.widgetLabel(self.mainArea, "Filter") self.filterLineEdit = QLineEdit( textChanged=self.filter ) self.completer = TokenListCompleter( self, caseSensitivity=Qt.CaseInsensitive ) self.filterLineEdit.setCompleter(self.completer) self.mainArea.layout().addWidget(self.filterLineEdit) splitter = QSplitter(Qt.Vertical, self.mainArea) self.mainArea.layout().addWidget(splitter) self.treeWidget = QTreeView(splitter) self.treeWidget.setSelectionMode(QTreeView.SingleSelection) self.treeWidget.setRootIsDecorated(False) self.treeWidget.setSortingEnabled(True) self.treeWidget.setAlternatingRowColors(True) self.treeWidget.setUniformRowHeights(True) self.treeWidget.setEditTriggers(QTreeView.NoEditTriggers) linkdelegate = LinkStyledItemDelegate(self.treeWidget) self.treeWidget.setItemDelegateForColumn(1, linkdelegate) self.treeWidget.setItemDelegateForColumn(8, linkdelegate) self.treeWidget.setItemDelegateForColumn( 0, gui.IndicatorItemDelegate(self.treeWidget, role=Qt.DisplayRole)) proxyModel = MySortFilterProxyModel(self.treeWidget) self.treeWidget.setModel(proxyModel) self.treeWidget.selectionModel().selectionChanged.connect( self.updateSelection ) self.treeWidget.viewport().setMouseTracking(True) splitterH = QSplitter(Qt.Horizontal, splitter) box = gui.widgetBox(splitterH, "Description") self.infoGDS = gui.widgetLabel(box, "") self.infoGDS.setWordWrap(True) gui.rubber(box) box = gui.widgetBox(splitterH, "Sample Annotations") self.annotationsTree = QTreeWidget(box) self.annotationsTree.setHeaderLabels( ["Type (Sample annotations)", "Sample count"] ) self.annotationsTree.setRootIsDecorated(True) box.layout().addWidget(self.annotationsTree) self.annotationsTree.itemChanged.connect( self.annotationSelectionChanged ) self._annotationsUpdating = False self.splitters = splitter, splitterH for sp, setting in zip(self.splitters, self.splitterSettings): sp.splitterMoved.connect(self.splitterMoved) sp.restoreState(setting) self.searchKeys = ["dataset_id", "title", "platform_organism", "description"] self.gds = [] self.gds_info = None self.resize(1000, 600) self.setBlocking(True) self.setEnabled(False) self.progressBarInit() self._executor = ThreadExecutor() func = partial(get_gds_model, methodinvoke(self, "_setProgress", (float,))) self._inittask = Task(function=func) self._inittask.finished.connect(self._initializemodel) self._executor.submit(self._inittask) self._datatask = None @Slot(float) def _setProgress(self, value): self.progressBarValue = value def _initializemodel(self): assert self.thread() is QThread.currentThread() model, self.gds_info, self.gds = self._inittask.result() model.setParent(self) proxy = self.treeWidget.model() proxy.setFilterKeyColumn(0) proxy.setFilterRole(TextFilterRole) proxy.setFilterCaseSensitivity(False) proxy.setFilterFixedString(self.filterString) proxy.setSourceModel(model) proxy.sort(0, Qt.DescendingOrder) self.progressBarFinished() self.setBlocking(False) self.setEnabled(True) filter_items = " ".join( gds[key] for gds in self.gds for key in self.searchKeys ) tr_chars = ",.:;!?(){}[]_-+\\|/%#@$^&*<>~`" tr_table = str.maketrans(tr_chars, " " * len(tr_chars)) filter_items = filter_items.translate(tr_table) filter_items = sorted(set(filter_items.split(" "))) filter_items = [item for item in filter_items if len(item) > 3] self.completer.setTokenList(filter_items) if self.currentGds: current_id = self.currentGds["dataset_id"] gdss = [(i, qunpack(proxy.data(proxy.index(i, 1), Qt.DisplayRole))) for i in range(proxy.rowCount())] current = [i for i, data in gdss if data and data == current_id] if current: current_index = proxy.index(current[0], 0) self.treeWidget.selectionModel().select( current_index, QItemSelectionModel.Select | QItemSelectionModel.Rows ) self.treeWidget.scrollTo( current_index, QTreeView.PositionAtCenter) for i in range(8): self.treeWidget.resizeColumnToContents(i) self.treeWidget.setColumnWidth( 1, min(self.treeWidget.columnWidth(1), 300)) self.treeWidget.setColumnWidth( 2, min(self.treeWidget.columnWidth(2), 200)) self.updateInfo() def updateInfo(self): gds_info = self.gds_info text = ("%i datasets\n%i datasets cached\n" % (len(gds_info), len(glob.glob(serverfiles.localpath("GEO") + "/GDS*")))) filtered = self.treeWidget.model().rowCount() if len(self.gds) != filtered: text += ("%i after filtering") % filtered self.infoBox.setText(text) def updateSelection(self, *args): current = self.treeWidget.selectedIndexes() mapToSource = self.treeWidget.model().mapToSource current = [mapToSource(index).row() for index in current] if current: self.currentGds = self.gds[current[0]] self.setAnnotations(self.currentGds) self.infoGDS.setText(self.currentGds.get("description", "")) self.nameEdit.setPlaceholderText(self.currentGds["title"]) self.datasetName = \ self.datasetNames.get(self.currentGds["dataset_id"], "") else: self.currentGds = None self.nameEdit.setPlaceholderText("") self.datasetName = "" self.commitIf() def setAnnotations(self, gds): self._annotationsUpdating = True self.annotationsTree.clear() annotations = defaultdict(set) subsetscount = {} for desc in gds["subsets"]: annotations[desc["type"]].add(desc["description"]) subsetscount[desc["description"]] = str(len(desc["sample_id"])) for type, subsets in annotations.items(): key = (gds["dataset_id"], type) subsetItem = QTreeWidgetItem(self.annotationsTree, [type]) subsetItem.setFlags(subsetItem.flags() | Qt.ItemIsUserCheckable | Qt.ItemIsTristate) subsetItem.setCheckState( 0, self.gdsSelectionStates.get(key, Qt.Checked) ) subsetItem.key = key for subset in subsets: key = (gds["dataset_id"], type, subset) item = QTreeWidgetItem( subsetItem, [subset, subsetscount.get(subset, "")] ) item.setFlags(item.flags() | Qt.ItemIsUserCheckable) item.setCheckState( 0, self.gdsSelectionStates.get(key, Qt.Checked) ) item.key = key self._annotationsUpdating = False self.annotationsTree.expandAll() for i in range(self.annotationsTree.columnCount()): self.annotationsTree.resizeColumnToContents(i) def annotationSelectionChanged(self, item, column): if self._annotationsUpdating: return for i in range(self.annotationsTree.topLevelItemCount()): item = self.annotationsTree.topLevelItem(i) self.gdsSelectionStates[item.key] = item.checkState(0) for j in range(item.childCount()): child = item.child(j) self.gdsSelectionStates[child.key] = child.checkState(0) def filter(self): filter_string = unicode(self.filterLineEdit.text()) proxyModel = self.treeWidget.model() if proxyModel: strings = filter_string.lower().strip().split() proxyModel.setFilterFixedStrings(strings) self.updateInfo() def selectedSamples(self): """ Return the currently selected sample annotations. The return value is a list of selected (sample type, sample value) tuples. .. note:: if some Sample annotation type has no selected values. this method will return all values for it. """ samples = [] unused_types = [] used_types = [] for stype in childiter(self.annotationsTree.invisibleRootItem()): selected_values = [] all_values = [] for sval in childiter(stype): value = (str(stype.text(0)), str(sval.text(0))) if self.gdsSelectionStates.get(sval.key, True): selected_values.append(value) all_values.append(value) if selected_values: samples.extend(selected_values) used_types.append(str(stype.text(0))) else: # If no sample of sample type is selected we don't filter # on it. samples.extend(all_values) unused_types.append(str(stype.text(0))) return samples, used_types def commitIf(self): if self.autoCommit: self.commit() else: self.selectionChanged = True @Slot(int, int) def progressCompleted(self, value, total): if total > 0: self.progressBarSet(100. * value / total, processEvents=False) else: pass # TODO: report 'indeterminate progress' def commit(self): if self.currentGds: self.error(0) sample_type = None self.progressBarInit(processEvents=None) _, groups = self.selectedSamples() if len(groups) == 1 and self.outputRows: sample_type = groups[0] self.setEnabled(False) self.setBlocking(True) progress = methodinvoke(self, "progressCompleted", (int, int)) def get_data(gds_id, report_genes, transpose, sample_type, title): gds_ensure_downloaded(gds_id, progress) gds = geo.GDS(gds_id) data = gds.getdata( report_genes=report_genes, transpose=transpose, sample_type=sample_type ) data.name = title return data get_data = partial( get_data, self.currentGds["dataset_id"], report_genes=self.mergeSpots, transpose=self.outputRows, sample_type=sample_type, title=self.datasetName or self.currentGds["title"] ) self._datatask = Task(function=get_data) self._datatask.finished.connect(self._on_dataready) self._executor.submit(self._datatask) def _on_dataready(self): self.setEnabled(True) self.setBlocking(False) self.progressBarFinished(processEvents=False) try: data = self._datatask.result() except urlrequest.URLError as error: self.error(0, ("Error while connecting to the NCBI ftp server! " "'%s'" % error)) sys.excepthook(type(error), error, getattr(error, "__traceback__")) return finally: self._datatask = None data_name = data.name samples, _ = self.selectedSamples() self.warning(0) message = None if self.outputRows: def samplesinst(ex): out = [] for meta in data.domain.metas: out.append((meta.name, ex[meta].value)) if data.domain.class_var.name != 'class': out.append((data.domain.class_var.name, ex[data.domain.class_var].value)) return out samples = set(samples) mask = [samples.issuperset(samplesinst(ex)) for ex in data] data = data[numpy.array(mask, dtype=bool)] if len(data) == 0: message = "No samples with selected sample annotations." else: samples = set(samples) domain = Orange.data.Domain( [attr for attr in data.domain.attributes if samples.issuperset(attr.attributes.items())], data.domain.class_var, data.domain.metas ) # domain.addmetas(data.domain.getmetas()) if len(domain.attributes) == 0: message = "No samples with selected sample annotations." stypes = set(s[0] for s in samples) for attr in domain.attributes: attr.attributes = dict( (key, value) for key, value in attr.attributes.items() if key in stypes ) data = Orange.data.Table(domain, data) if message is not None: self.warning(0, message) data_hints.set_hint(data, "taxid", self.currentGds.get("taxid", ""), 10.0) data_hints.set_hint(data, "genesinrows", self.outputRows, 10.0) data.name = data_name self.send("Expression Data", data) model = self.treeWidget.model().sourceModel() row = self.gds.index(self.currentGds) model.setData(model.index(row, 0), " ", Qt.DisplayRole) self.updateInfo() self.selectionChanged = False def splitterMoved(self, *args): self.splitterSettings = [bytes(sp.saveState()) for sp in self.splitters] def send_report(self): self.report_items("GEO Dataset", [("ID", self.currentGds['dataset_id']), ("Title", self.currentGds['title']), ("Organism", self.currentGds['sample_organism'])]) self.report_items("Data", [("Samples", self.currentGds['sample_count']), ("Features", self.currentGds['feature_count']), ("Genes", self.currentGds['gene_count'])]) self.report_name("Sample annotations") subsets = defaultdict(list) for subset in self.currentGds['subsets']: subsets[subset['type']].append((subset['description'], len(subset['sample_id']))) self.report_html += "<ul>" for type in subsets: self.report_html += "<b>" + type + ":</b></br>" for desc, count in subsets[type]: self.report_html += 9 * " " + "<b>{}:</b> {}</br>".format(desc, count) self.report_html += "</ul>" def onDeleteWidget(self): if self._inittask: self._inittask.future().cancel() self._inittask.finished.disconnect(self._initializemodel) if self._datatask: self._datatask.future().cancel() self._datatask.finished.disconnect(self._on_dataready) self._executor.shutdown(wait=False) super(OWGEODatasets, self).onDeleteWidget() def onNameEdited(self): if self.currentGds: gds_id = self.currentGds["dataset_id"] self.datasetNames[gds_id] = unicode(self.nameEdit.text()) self.commitIf()
class OWGEODatasets(OWWidget, ConcurrentWidgetMixin): name = "GEO Data Sets" description = "Access to Gene Expression Omnibus data sets." icon = "icons/OWGEODatasets.svg" priority = 2 class Outputs: gds_data = Output("Expression Data", Table) search_pattern = Setting('') auto_commit = Setting(True) genes_as_rows = Setting(False) mergeSpots = Setting(True) selected_gds = Setting(None) gdsSelectionStates = Setting({}) splitter_settings = Setting(( b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xea\x00\x00\x00\xd7\x01\x00\x00\x00\x07\x01\x00\x00\x00\x02', b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xb5\x00\x00\x02\x10\x01\x00\x00\x00\x07\x01\x00\x00\x00\x01', )) def __init__(self): OWWidget.__init__(self) ConcurrentWidgetMixin.__init__(self) self.gds_info: Optional[GDSInfo] = GDSInfo( ) # TODO: handle possible exceptions self.gds_data: Optional[Table] = None # Control area box = widgetBox(self.controlArea, 'Info', addSpace=True) self.infoBox = widgetLabel(box, 'Initializing\n\n') box = widgetBox(self.controlArea, 'Output', addSpace=True) radioButtonsInBox(box, self, 'genes_as_rows', ['Samples in rows', 'Genes in rows'], callback=self._run) separator(box) rubber(self.controlArea) auto_commit(self.controlArea, self, 'auto_commit', '&Commit', box=False) # Main Area # Filter widget self.filter = lineEdit(self.mainArea, self, 'search_pattern', 'Filter:', callbackOnType=True, callback=self._apply_filter) self.mainArea.layout().addWidget(self.filter) splitter_vertical = QSplitter(Qt.Vertical, self.mainArea) self.mainArea.layout().addWidget(splitter_vertical) # set table view self.table_view = QTableView(splitter_vertical) self.table_view.setShowGrid(False) self.table_view.setSortingEnabled(True) self.table_view.sortByColumn(1, Qt.AscendingOrder) self.table_view.setAlternatingRowColors(True) self.table_view.verticalHeader().setVisible(False) self.table_view.setSelectionBehavior(QAbstractItemView.SelectRows) self.table_view.setSelectionMode(QAbstractItemView.SingleSelection) self.table_view.viewport().setMouseTracking(True) self.table_view.setSizeAdjustPolicy( QAbstractScrollArea.AdjustToContents) self.table_model = GEODatasetsModel() self.table_model.initialize(self.gds_info) self.table_view.setModel(self.table_model) self.table_model.show_table() self.table_view.horizontalHeader().setStretchLastSection(True) self.table_view.resizeColumnsToContents() v_header = self.table_view.verticalHeader() option = self.table_view.viewOptions() size = self.table_view.style().sizeFromContents( QStyle.CT_ItemViewItem, option, QSize(20, 20), self.table_view) v_header.setDefaultSectionSize(size.height() + 2) v_header.setMinimumSectionSize(5) # set item delegates self.table_view.setItemDelegateForColumn( self.table_model.pubmedid_col, LinkStyledItemDelegate(self.table_view)) self.table_view.setItemDelegateForColumn( self.table_model.gds_id_col, LinkStyledItemDelegate(self.table_view)) self.table_view.setItemDelegateForColumn( self.table_model.indicator_col, IndicatorItemDelegate(self.table_view, role=Qt.DisplayRole), ) splitter_horizontal = QSplitter(Qt.Horizontal, splitter_vertical) # Description Widget box = widgetBox(splitter_horizontal, 'Description') self.description_widget = widgetLabel(box, '') self.description_widget.setWordWrap(True) rubber(box) # Sample Annotations Widget box = widgetBox(splitter_horizontal, 'Sample Annotations') self.annotations_widget = QTreeWidget(box) self.annotations_widget.setHeaderLabels( ['Type (Sample annotations)', 'Sample count']) self.annotations_widget.setRootIsDecorated(True) box.layout().addWidget(self.annotations_widget) self._annotations_updating = False self.annotations_widget.itemChanged.connect( self.on_annotation_selection_changed) self.splitters = splitter_vertical, splitter_horizontal for sp, setting in zip(self.splitters, self.splitter_settings): sp.splitterMoved.connect(self._splitter_moved) sp.restoreState(setting) self.table_view.selectionModel().selectionChanged.connect( self.on_gds_selection_changed) self._apply_filter() self._run() def _splitter_moved(self, *args): self.splitter_settings = [ bytes(sp.saveState()) for sp in self.splitters ] def _set_description_widget(self): self.description_widget.setText( self.selected_gds.get('description', 'Description not available.')) def _set_annotations_widget(self, gds): self._annotations_updating = True self.annotations_widget.clear() annotations = defaultdict(set) subsets_count = {} for desc in gds['subsets']: annotations[desc['type']].add(desc['description']) subsets_count[desc['description']] = str(len(desc['sample_id'])) for _type, subsets in annotations.items(): key = (gds["name"], _type) parent = QTreeWidgetItem(self.annotations_widget, [_type]) parent.key = key for subset in subsets: key = (gds['name'], _type, subset) item = QTreeWidgetItem( parent, [subset, subsets_count.get(subset, '')]) item.setFlags(item.flags() | Qt.ItemIsUserCheckable) item.setCheckState( 0, self.gdsSelectionStates.get(key, Qt.Checked)) item.key = key self._annotations_updating = False self.annotations_widget.expandAll() for i in range(self.annotations_widget.columnCount()): self.annotations_widget.resizeColumnToContents(i) def _set_selection(self): if self.selected_gds is not None: index = self.table_model.get_row_index( self.selected_gds.get('name')) if index is not None: self.table_view.selectionModel().blockSignals(True) self.table_view.selectRow(index) self._handle_selection_changed() self.table_view.selectionModel().blockSignals(False) def _handle_selection_changed(self): if self.table_model.table is not None: selection = self.table_view.selectionModel().selectedRows( self.table_model.gds_id_col) selected_gds_name = selection[0].data( ) if len(selection) > 0 else None if selected_gds_name: self.selected_gds = self.table_model.info.get( selected_gds_name) self._set_annotations_widget(self.selected_gds) self._set_description_widget() else: self.annotations_widget.clear() self.description_widget.clear() self.update_info() def _apply_filter(self): if self.table_model.table is not None: self.table_model.show_table( filter_pattern=str(self.search_pattern)) self._set_selection() self.update_info() def _run(self): if self.selected_gds is not None: self.gds_data = None self.start(run_download_task, self.selected_gds.get('name'), self.get_selected_samples(), self.genes_as_rows) def on_gds_selection_changed(self): self._handle_selection_changed() self._run() def on_annotation_selection_changed(self): if self._annotations_updating: return for i in range(self.annotations_widget.topLevelItemCount()): item = self.annotations_widget.topLevelItem(i) if 'key' in item.__dict__: self.gdsSelectionStates[item.key] = item.checkState(0) for j in range(item.childCount()): child = item.child(j) if 'key' in child.__dict__: self.gdsSelectionStates[child.key] = child.checkState(0) self._run() def update_info(self): all_gds = len(self.table_model.info) text = "{} datasets\n{} datasets cached\n".format( all_gds, len(local_files.listfiles())) filtered = self.table_view.model().rowCount() if all_gds != filtered: text += "{} after filtering".format(filtered) self.infoBox.setText(text) def get_selected_samples(self): """ Return the currently selected sample annotations. The return value is a list of selected (sample type, sample value) tuples. .. note:: if some Sample annotation type has no selected values. this method will return all values for it. TODO: this could probably be simplified. """ def childiter(item): """ Iterate over the children of an QTreeWidgetItem instance. """ for i in range(item.childCount()): yield item.child(i) samples = [] unused_types = [] used_types = [] for stype in childiter(self.annotations_widget.invisibleRootItem()): selected_values = [] all_values = [] for sval in childiter(stype): value = (str(stype.text(0)), str(sval.text(0))) if self.gdsSelectionStates.get(sval.key, True): selected_values.append(value) all_values.append(value) if selected_values: samples.extend(selected_values) used_types.append(str(stype.text(0))) else: # If no sample of sample type is selected we don't filter on it. samples.extend(all_values) unused_types.append(str(stype.text(0))) _samples = defaultdict(list) for sample, sample_type in samples: _samples[sample].append(sample_type) return _samples def commit(self): self.Outputs.gds_data.send(self.gds_data) def on_done(self, result: Result): assert isinstance(result.gds_dataset, Table) self.gds_data = result.gds_dataset self.commit() if self.gds_info: self.table_model.initialize(self.gds_info) self._apply_filter() def on_partial_result(self, result: Any) -> None: pass def onDeleteWidget(self): self.shutdown() super().onDeleteWidget() def send_report(self): self.report_items( "GEO Dataset", [ ("ID", self.selected_gds['name']), ("Title", self.selected_gds['title']), ("Organism", self.selected_gds['sample_organism']), ], ) self.report_items( "Data", [ ("Samples", self.selected_gds['sample_count']), ("Features", self.selected_gds['variables']), ("Genes", self.selected_gds['genes']), ], ) self.report_name("Sample annotations") subsets = defaultdict(list) for subset in self.selected_gds['subsets']: subsets[subset['type']].append( (subset['description'], len(subset['sample_id']))) self.report_html += "<ul>" for _type in subsets: self.report_html += "<b>" + _type + ":</b></br>" for desc, count in subsets[_type]: self.report_html += 9 * " " + "<b>{}:</b> {}</br>".format( desc, count) self.report_html += "</ul>"