def test_gds_info(self):
        gds_info = GDSInfo()
        self.assertIsNotNone(gds_info)
        self.assertGreater(len(gds_info.keys()), 0)
        self.assertGreater(len(gds_info.items()), 0)
        self.assertGreater(len(gds_info.values()), 0)

        self.assertIsNotNone(gds_info[self.test_sample])
        self.assertEqual(gds_info[self.test_sample]['genes'], 9561)
        self.assertEqual(int(gds_info[self.test_sample]['sample_count']), 4)
        self.assertEqual(len(gds_info[self.test_sample]['subsets']), 2)
    def test_gds_data_transposed(self):
        gds_info = GDSInfo()
        gds_table = GDS(self.test_sample, transpose=True)

        # test if data is downloaded
        self.assertIsNotNone(gds_table)
        self.assertIsInstance(gds_table, Table)

        # test data table values
        rows, columns = gds_table.X.shape
        self.assertEqual(int(gds_info[self.test_sample]['sample_count']), columns)
        self.assertEqual(int(gds_info[self.test_sample]['genes']), rows)

        # test data table annotations
        self.assertTrue(TableAnnotation.gene_as_attr_name in gds_table.attributes)
        self.assertTrue(TableAnnotation.gene_id_column in gds_table.attributes)
        self.assertTrue(TableAnnotation.tax_id in gds_table.attributes)

        self.assertFalse(gds_table.attributes[TableAnnotation.gene_as_attr_name])
예제 #3
0
""" Documentation script """
import textwrap

from orangecontrib.bioinformatics.geo.dataset import GDSInfo

gds_info = GDSInfo()
gds = gds_info["GDS10"]

print("ID:")
print(gds["dataset_id"])
print("Features: ")
print(gds["feature_count"])
print("Genes:")
print(gds["gene_count"])
print("Organism:")
print(gds["platform_organism"])
print("PubMed ID:")
print(gds["pubmed_id"])
print("Sample types:")

for sample_type in set([sinfo["type"] for sinfo in gds["subsets"]]):
    ss = [
        sinfo["description"] for sinfo in gds["subsets"]
        if sinfo["type"] == sample_type
    ]
    print("  %s (%s)" % (sample_type, ", ".join(ss)))

print("")
print("Description:")
print("\n".join(textwrap.wrap(gds["description"], 70)))
예제 #4
0
def get_gds_model(progress=lambda val: None):
    """
    Initialize and return a GDS datasets model.

    :param progress: A progress callback.
    :rval tuple:
        A tuple of (QStandardItemModel, GDSInfo, [GDS])

    .. note::
        The returned QStandardItemModel's thread affinity is set to
        the GUI thread.

    """
    progress(1)
    info = GDSInfo()
    search_keys = ["dataset_id", "title", "platform_organism", "description"]
    cache_dir = serverfiles.localpath(DOMAIN)
    gds_link = "http://www.ncbi.nlm.nih.gov/sites/GDSbrowser?acc={0}"
    pm_link = "http://www.ncbi.nlm.nih.gov/pubmed/{0}"
    gds_list = []

    def is_cached(gds):
        return os.path.exists(
            os.path.join(cache_dir, gds["dataset_id"]) + ".soft.gz")

    def item(displayvalue, item_values={}):
        item = QStandardItem()
        item.setData(displayvalue, Qt.DisplayRole)
        for role, value in item_values.items():
            item.setData(value, role)
        return item

    def gds_to_row(gds):
        #: Text for easier full search.
        search_text = " | ".join(
            [gds.get(key, "").lower() for key in search_keys])
        row = [
            item(" " if is_cached(gds) else "", {TextFilterRole: search_text}),
            item(gds["dataset_id"],
                 {gui.LinkRole: gds_link.format(gds["dataset_id"])}),
            item(gds["title"]),
            item(gds["platform_organism"]),
            item(len(gds["samples"])),
            item(gds["feature_count"]),
            item(gds["gene_count"]),
            item(len(gds["subsets"])),
            item(
                gds.get("pubmed_id", ""), {
                    gui.LinkRole:
                    pm_link.format(gds["pubmed_id"])
                    if gds.get("pubmed_id") else None
                })
        ]
        return row

    model = QStandardItemModel()
    model.setHorizontalHeaderLabels([
        "", "ID", "Title", "Organism", "Samples", "Features", "Genes",
        "Subsets", "PubMedID"
    ])
    progress(20)
    for gds in info.values():
        model.appendRow(gds_to_row(gds))

        gds_list.append(gds)

    progress(50)

    if QThread.currentThread() is not QCoreApplication.instance().thread():
        model.moveToThread(QCoreApplication.instance().thread())
    return model, info, gds_list
예제 #5
0
def valid(info, n=40):
    """Return a set of subset types containing more than n samples in every subset"""
    invalid = set()
    subsets = set([sinfo["type"] for sinfo in info["subsets"]])
    for sampleinfo in info["subsets"]:
        if len(sampleinfo["sample_id"]) < n:
            invalid.add(sampleinfo["type"])
    return subsets.difference(invalid)


def report(stypes, info):
    """Pretty-print GDS and valid susbset types"""
    for id, sts in stypes:
        print(id)
        for st in sts:
            gds = info[id]
            print("  %s:" % st + ", ".join([
                "%s/%d" % (sinfo["description"], len(sinfo["sample_id"]))
                for sinfo in gds["subsets"] if sinfo["type"] == st
            ]))


gdsinfo = GDSInfo()
valid_subset_types = [(id, valid(info)) for id, info in sorted(gdsinfo.items())
                      if valid(info)]
report(valid_subset_types, gdsinfo)

print('datasets = ' + str(len(valid_subset_types)))
print('type subsets = ' + str(sum(len(b) for _, b in valid_subset_types)))
    def __init__(self):
        OWWidget.__init__(self)
        ConcurrentWidgetMixin.__init__(self)

        self.gds_info: Optional[GDSInfo] = GDSInfo(
        )  # TODO: handle possible exceptions
        self.gds_data: Optional[Table] = None

        # Control area
        box = widgetBox(self.controlArea, 'Info', addSpace=True)
        self.infoBox = widgetLabel(box, 'Initializing\n\n')

        box = widgetBox(self.controlArea, 'Output', addSpace=True)
        radioButtonsInBox(box,
                          self,
                          'genes_as_rows',
                          ['Samples in rows', 'Genes in rows'],
                          callback=self._run)
        separator(box)

        rubber(self.controlArea)
        auto_commit(self.controlArea,
                    self,
                    'auto_commit',
                    '&Commit',
                    box=False)

        # Main Area

        # Filter widget
        self.filter = lineEdit(self.mainArea,
                               self,
                               'search_pattern',
                               'Filter:',
                               callbackOnType=True,
                               callback=self._apply_filter)
        self.mainArea.layout().addWidget(self.filter)

        splitter_vertical = QSplitter(Qt.Vertical, self.mainArea)

        self.mainArea.layout().addWidget(splitter_vertical)

        # set table view
        self.table_view = QTableView(splitter_vertical)
        self.table_view.setShowGrid(False)
        self.table_view.setSortingEnabled(True)
        self.table_view.sortByColumn(1, Qt.AscendingOrder)
        self.table_view.setAlternatingRowColors(True)
        self.table_view.verticalHeader().setVisible(False)
        self.table_view.setSelectionBehavior(QAbstractItemView.SelectRows)
        self.table_view.setSelectionMode(QAbstractItemView.SingleSelection)
        self.table_view.viewport().setMouseTracking(True)
        self.table_view.setSizeAdjustPolicy(
            QAbstractScrollArea.AdjustToContents)

        self.table_model = GEODatasetsModel()
        self.table_model.initialize(self.gds_info)
        self.table_view.setModel(self.table_model)
        self.table_model.show_table()

        self.table_view.horizontalHeader().setStretchLastSection(True)
        self.table_view.resizeColumnsToContents()

        v_header = self.table_view.verticalHeader()
        option = self.table_view.viewOptions()
        size = self.table_view.style().sizeFromContents(
            QStyle.CT_ItemViewItem, option, QSize(20, 20), self.table_view)

        v_header.setDefaultSectionSize(size.height() + 2)
        v_header.setMinimumSectionSize(5)

        # set item delegates
        self.table_view.setItemDelegateForColumn(
            self.table_model.pubmedid_col,
            LinkStyledItemDelegate(self.table_view))
        self.table_view.setItemDelegateForColumn(
            self.table_model.gds_id_col,
            LinkStyledItemDelegate(self.table_view))
        self.table_view.setItemDelegateForColumn(
            self.table_model.indicator_col,
            IndicatorItemDelegate(self.table_view, role=Qt.DisplayRole),
        )

        splitter_horizontal = QSplitter(Qt.Horizontal, splitter_vertical)

        # Description Widget
        box = widgetBox(splitter_horizontal, 'Description')
        self.description_widget = widgetLabel(box, '')
        self.description_widget.setWordWrap(True)
        rubber(box)

        # Sample Annotations Widget
        box = widgetBox(splitter_horizontal, 'Sample Annotations')
        self.annotations_widget = QTreeWidget(box)
        self.annotations_widget.setHeaderLabels(
            ['Type (Sample annotations)', 'Sample count'])
        self.annotations_widget.setRootIsDecorated(True)
        box.layout().addWidget(self.annotations_widget)
        self._annotations_updating = False
        self.annotations_widget.itemChanged.connect(
            self.on_annotation_selection_changed)
        self.splitters = splitter_vertical, splitter_horizontal

        for sp, setting in zip(self.splitters, self.splitter_settings):
            sp.splitterMoved.connect(self._splitter_moved)
            sp.restoreState(setting)

        self.table_view.selectionModel().selectionChanged.connect(
            self.on_gds_selection_changed)
        self._apply_filter()

        self._run()
예제 #7
0
 def setUp(self):
     self.info = GDSInfo()