def test_gds_info(self): gds_info = GDSInfo() self.assertIsNotNone(gds_info) self.assertGreater(len(gds_info.keys()), 0) self.assertGreater(len(gds_info.items()), 0) self.assertGreater(len(gds_info.values()), 0) self.assertIsNotNone(gds_info[self.test_sample]) self.assertEqual(gds_info[self.test_sample]['genes'], 9561) self.assertEqual(int(gds_info[self.test_sample]['sample_count']), 4) self.assertEqual(len(gds_info[self.test_sample]['subsets']), 2)
def test_gds_data_transposed(self): gds_info = GDSInfo() gds_table = GDS(self.test_sample, transpose=True) # test if data is downloaded self.assertIsNotNone(gds_table) self.assertIsInstance(gds_table, Table) # test data table values rows, columns = gds_table.X.shape self.assertEqual(int(gds_info[self.test_sample]['sample_count']), columns) self.assertEqual(int(gds_info[self.test_sample]['genes']), rows) # test data table annotations self.assertTrue(TableAnnotation.gene_as_attr_name in gds_table.attributes) self.assertTrue(TableAnnotation.gene_id_column in gds_table.attributes) self.assertTrue(TableAnnotation.tax_id in gds_table.attributes) self.assertFalse(gds_table.attributes[TableAnnotation.gene_as_attr_name])
""" Documentation script """ import textwrap from orangecontrib.bioinformatics.geo.dataset import GDSInfo gds_info = GDSInfo() gds = gds_info["GDS10"] print("ID:") print(gds["dataset_id"]) print("Features: ") print(gds["feature_count"]) print("Genes:") print(gds["gene_count"]) print("Organism:") print(gds["platform_organism"]) print("PubMed ID:") print(gds["pubmed_id"]) print("Sample types:") for sample_type in set([sinfo["type"] for sinfo in gds["subsets"]]): ss = [ sinfo["description"] for sinfo in gds["subsets"] if sinfo["type"] == sample_type ] print(" %s (%s)" % (sample_type, ", ".join(ss))) print("") print("Description:") print("\n".join(textwrap.wrap(gds["description"], 70)))
def get_gds_model(progress=lambda val: None): """ Initialize and return a GDS datasets model. :param progress: A progress callback. :rval tuple: A tuple of (QStandardItemModel, GDSInfo, [GDS]) .. note:: The returned QStandardItemModel's thread affinity is set to the GUI thread. """ progress(1) info = GDSInfo() search_keys = ["dataset_id", "title", "platform_organism", "description"] cache_dir = serverfiles.localpath(DOMAIN) gds_link = "http://www.ncbi.nlm.nih.gov/sites/GDSbrowser?acc={0}" pm_link = "http://www.ncbi.nlm.nih.gov/pubmed/{0}" gds_list = [] def is_cached(gds): return os.path.exists( os.path.join(cache_dir, gds["dataset_id"]) + ".soft.gz") def item(displayvalue, item_values={}): item = QStandardItem() item.setData(displayvalue, Qt.DisplayRole) for role, value in item_values.items(): item.setData(value, role) return item def gds_to_row(gds): #: Text for easier full search. search_text = " | ".join( [gds.get(key, "").lower() for key in search_keys]) row = [ item(" " if is_cached(gds) else "", {TextFilterRole: search_text}), item(gds["dataset_id"], {gui.LinkRole: gds_link.format(gds["dataset_id"])}), item(gds["title"]), item(gds["platform_organism"]), item(len(gds["samples"])), item(gds["feature_count"]), item(gds["gene_count"]), item(len(gds["subsets"])), item( gds.get("pubmed_id", ""), { gui.LinkRole: pm_link.format(gds["pubmed_id"]) if gds.get("pubmed_id") else None }) ] return row model = QStandardItemModel() model.setHorizontalHeaderLabels([ "", "ID", "Title", "Organism", "Samples", "Features", "Genes", "Subsets", "PubMedID" ]) progress(20) for gds in info.values(): model.appendRow(gds_to_row(gds)) gds_list.append(gds) progress(50) if QThread.currentThread() is not QCoreApplication.instance().thread(): model.moveToThread(QCoreApplication.instance().thread()) return model, info, gds_list
def valid(info, n=40): """Return a set of subset types containing more than n samples in every subset""" invalid = set() subsets = set([sinfo["type"] for sinfo in info["subsets"]]) for sampleinfo in info["subsets"]: if len(sampleinfo["sample_id"]) < n: invalid.add(sampleinfo["type"]) return subsets.difference(invalid) def report(stypes, info): """Pretty-print GDS and valid susbset types""" for id, sts in stypes: print(id) for st in sts: gds = info[id] print(" %s:" % st + ", ".join([ "%s/%d" % (sinfo["description"], len(sinfo["sample_id"])) for sinfo in gds["subsets"] if sinfo["type"] == st ])) gdsinfo = GDSInfo() valid_subset_types = [(id, valid(info)) for id, info in sorted(gdsinfo.items()) if valid(info)] report(valid_subset_types, gdsinfo) print('datasets = ' + str(len(valid_subset_types))) print('type subsets = ' + str(sum(len(b) for _, b in valid_subset_types)))
def __init__(self): OWWidget.__init__(self) ConcurrentWidgetMixin.__init__(self) self.gds_info: Optional[GDSInfo] = GDSInfo( ) # TODO: handle possible exceptions self.gds_data: Optional[Table] = None # Control area box = widgetBox(self.controlArea, 'Info', addSpace=True) self.infoBox = widgetLabel(box, 'Initializing\n\n') box = widgetBox(self.controlArea, 'Output', addSpace=True) radioButtonsInBox(box, self, 'genes_as_rows', ['Samples in rows', 'Genes in rows'], callback=self._run) separator(box) rubber(self.controlArea) auto_commit(self.controlArea, self, 'auto_commit', '&Commit', box=False) # Main Area # Filter widget self.filter = lineEdit(self.mainArea, self, 'search_pattern', 'Filter:', callbackOnType=True, callback=self._apply_filter) self.mainArea.layout().addWidget(self.filter) splitter_vertical = QSplitter(Qt.Vertical, self.mainArea) self.mainArea.layout().addWidget(splitter_vertical) # set table view self.table_view = QTableView(splitter_vertical) self.table_view.setShowGrid(False) self.table_view.setSortingEnabled(True) self.table_view.sortByColumn(1, Qt.AscendingOrder) self.table_view.setAlternatingRowColors(True) self.table_view.verticalHeader().setVisible(False) self.table_view.setSelectionBehavior(QAbstractItemView.SelectRows) self.table_view.setSelectionMode(QAbstractItemView.SingleSelection) self.table_view.viewport().setMouseTracking(True) self.table_view.setSizeAdjustPolicy( QAbstractScrollArea.AdjustToContents) self.table_model = GEODatasetsModel() self.table_model.initialize(self.gds_info) self.table_view.setModel(self.table_model) self.table_model.show_table() self.table_view.horizontalHeader().setStretchLastSection(True) self.table_view.resizeColumnsToContents() v_header = self.table_view.verticalHeader() option = self.table_view.viewOptions() size = self.table_view.style().sizeFromContents( QStyle.CT_ItemViewItem, option, QSize(20, 20), self.table_view) v_header.setDefaultSectionSize(size.height() + 2) v_header.setMinimumSectionSize(5) # set item delegates self.table_view.setItemDelegateForColumn( self.table_model.pubmedid_col, LinkStyledItemDelegate(self.table_view)) self.table_view.setItemDelegateForColumn( self.table_model.gds_id_col, LinkStyledItemDelegate(self.table_view)) self.table_view.setItemDelegateForColumn( self.table_model.indicator_col, IndicatorItemDelegate(self.table_view, role=Qt.DisplayRole), ) splitter_horizontal = QSplitter(Qt.Horizontal, splitter_vertical) # Description Widget box = widgetBox(splitter_horizontal, 'Description') self.description_widget = widgetLabel(box, '') self.description_widget.setWordWrap(True) rubber(box) # Sample Annotations Widget box = widgetBox(splitter_horizontal, 'Sample Annotations') self.annotations_widget = QTreeWidget(box) self.annotations_widget.setHeaderLabels( ['Type (Sample annotations)', 'Sample count']) self.annotations_widget.setRootIsDecorated(True) box.layout().addWidget(self.annotations_widget) self._annotations_updating = False self.annotations_widget.itemChanged.connect( self.on_annotation_selection_changed) self.splitters = splitter_vertical, splitter_horizontal for sp, setting in zip(self.splitters, self.splitter_settings): sp.splitterMoved.connect(self._splitter_moved) sp.restoreState(setting) self.table_view.selectionModel().selectionChanged.connect( self.on_gds_selection_changed) self._apply_filter() self._run()
def setUp(self): self.info = GDSInfo()