Ejemplo n.º 1
0
class OWBioMart(widget.OWWidget):
    name = "BioMart"
    description = "Query BioMart service"
    icon = "../widgets/icons/BioMart.svg"
    priority = 2010

    outputs = [("Data", Orange.data.Table)]

    SHOW_FILTERS = True

    selectedService = settings.Setting(MartServices[0][1])
    selectedDataset = settings.Setting(0)

    def __init__(self, parent=None):
        super().__init__(parent)

        self.selectedDatabase = 0
        self.uniqueRows = True

        gui.button(gui.widgetBox(self.controlArea, "Cache", addSpace=True),
                   self, "Clear cache",
                   tooltip="Clear saved query results",
                   callback=self.clearCache)
        self.serviceindex = 0
        self.serviceCombo = gui.comboBox(
            self.controlArea, self, "serviceindex", "Mart Service",
            callback=self._setServiceUrl
        )
        for name, url in MartServices:
            self.serviceCombo.addItem(name, userData=url)
        idx = self.serviceCombo.findData(self.selectedService, Qt.UserRole)
        self.serviceCombo.setCurrentIndex(idx)
        # self.selectedService = self.serviceCombo.itemData(self.serviceCombo.currentItem())

        self.martsCombo = gui.comboBox(
            self.controlArea, self, "selectedDatabase", "Database",
            callback=self.setSelectedMart,
            addSpace=True)
        self.martsCombo.setMaximumWidth(250)

        self.datasetsCombo = gui.comboBox(
            self.controlArea, self, "selectedDataset", "Dataset",
            callback=self.setSelectedDataset,
            addSpace=True)

        self.datasetsCombo.setMaximumWidth(250)

        gui.rubber(self.controlArea)

        box = gui.widgetBox(self.controlArea, "Results")
        gui.checkBox(
            box, self, "uniqueRows", "Unique results only",
            tooltip="Return unique results only.",)

        self.commitButton = gui.button(
            box, self, "Get Results", callback=self.commit,
            tooltip="Query the BioMart server and output the results",
            autoDefault=True)

        self.commitButton.setEnabled(False)

        self.mainWidget = gui.widgetBox(
            self.mainArea, orientation=QStackedLayout())

        self.mainTab = QTabWidget()

        self.mainWidget.layout().addWidget(self.mainTab)

        self.attributesConfigurationBox = gui.createTabPage(self.mainTab, "Attributes")

        if self.SHOW_FILTERS:  # ??
            self.filtersConfigurationBox = gui.createTabPage(self.mainTab, "Filters")

        self.error(0)
        self.setEnabled(False)
        self._task = None
        self._executor = concurrent.ThreadExecutor(
            threadPool=QThreadPool(maxThreadCount=2)
        )
        service = self.selectedService
        self._task = task = concurrent.Task(
            function=partial(self._get_registry, url=service))
        task.resultReady.connect(self.setBioMartRegistry)
        task.exceptionReady.connect(self._handleException)
        self._executor.submit(task)
        self._setServiceUrl()
        self._afterInitQueue = []

        try:
            from Bio import SeqIO
            self.hasBiopython = True
        except ImportError:
            self.warning(100, "Biopython package not found.\nTo retrieve FASTA sequence data from BioMart install Biopython.")
            self.hasBiopython = False

    def sizeHint(self):
        return QSize(800, 600)

    def _setServiceUrl(self):
        service = self.serviceCombo.itemData(self.serviceCombo.currentIndex())
        if service is not None:
            self.selectedService = service
            self._task = task = concurrent.Task(
                function=partial(self._get_registry, url=service))
            task.resultReady.connect(self.setBioMartRegistry)
            task.exceptionReady.connect(self._handleException)
            self._executor.submit(task)

    @staticmethod
    def _get_registry(url=None, precache=True):
        if url is None:
            url = MartServices[0][1]
        con = biomart.BioMartConnection(address=url, timeout=30)
        reg = biomart.BioMartRegistry(con)
        if precache:
            _ = reg.marts()
        return reg

    @Slot(Exception)
    def _handleException(self, exception):
        assert(QThread.currentThread() is self.thread())
        print("Task failed with:", exception, file=sys.stderr)
        import logging
        log = logging.getLogger(__name__)
        log.exception("Error:", exc_info=exception)
        self.error(0, str(exception))
        self.setEnabled(True)

    @Slot(object)
    def setBioMartRegistry(self, registry):
        assert(QThread.currentThread() is self.thread())
        self.setEnabled(True)
        self.registry = registry
        self.marts = [mart for mart in self.registry.marts()
                      if getattr(mart, "visible", "0") != "0"]

        self.martsCombo.clear()
        for mart in self.marts:
            self.martsCombo.addItem(mart.displayName)

    def setSelectedMart(self):
        self.mart = self.marts[self.selectedDatabase]
        self.error(0)
        self.setEnabled(False)

        self._task = task = concurrent.Task(function=self.mart.datasets)
        task.resultReady.connect(self.setBioMartDatasets)
        task.exceptionReady.connect(self._handleException)
        self._executor.submit(task)

    @Slot(object)
    def setBioMartDatasets(self, datasets):
        assert(QThread.currentThread() is self.thread())
        self.setEnabled(True)
        self.datasets = [data for data in datasets if
                         getattr(data, "visible", "0") != "0"]
        self.datasetsCombo.clear()
        self.datasetsCombo.addItems([data.displayName for data in self.datasets])

    def setSelectedDataset(self):
        self.dataset = self.datasets[self.selectedDataset]
        self.error(0)
        self.setEnabled(False)

        def get_configuration(dataset):
            connection = dataset.connection
            stream = connection.configuration(
                dataset=dataset.internalName,
                virtualSchema=dataset.serverVirtualSchema)
            response = stream.read()
            return response

        self._task = task = concurrent.Task(
            function=partial(get_configuration, self.dataset))

        task.resultReady.connect(self.setBioMartConfiguration)
        task.exceptionReady.connect(self._handleException)

        self._executor.submit(task)

    @Slot(object)
    def setBioMartConfiguration(self, configuration):
        assert(QThread.currentThread() is self.thread())
        self.setEnabled(True)
        # parse the xml in the main thread (a long time ago this step was
        # done in a thread but would frequently cause `expat` to segfault.
        doc = biomart.parseXML(io.BytesIO(configuration))
        config = list(doc.elements("DatasetConfig"))[0]
        configuration = biomart.DatasetConfig(
            self.registry, config.tag, config.attributes, config.children)

        self.clearConfiguration()

        self.configuration = configuration

        def hidden(tree):
            return getattr(tree, "hidden", "false") != "false" or \
                   getattr(tree, "hideDisplay", "false") != "false"

        self.attributePagesTabWidget = tabs = gui.tabWidget(self.attributesConfigurationBox)

        for page in configuration.elements("AttributePage"):
            if not hidden(page):
                page_widget = PageWidget(page, self.dataset, self)
                gui.createTabPage(tabs, getattr(page, "displayName", ""),
                                  widgetToAdd=page_widget, canScroll=True)

        if self.SHOW_FILTERS:
            self.filterPagesTabWidget = tabs = gui.tabWidget(self.filtersConfigurationBox)
            for page in configuration.elements("FilterPage"):
                if not hidden(page):
                    page_widget = PageWidget(page, self.dataset, self)
                    gui.createTabPage(tabs, getattr(page, "displayName", ""),
                                      widgetToAdd=page_widget, canScroll=True)

        self.afterInit()

        self.commitButton.setEnabled(True)

    def clearConfiguration(self):
        self.mainTab.deleteLater()

        self.mainTab = QTabWidget()
        self.mainWidget.layout().addWidget(self.mainTab)
        self.mainWidget.layout().setCurrentWidget(self.mainTab)

        self.attributesConfigurationBox = gui.createTabPage(self.mainTab, "Attributes")
        if self.SHOW_FILTERS:
            self.filtersConfigurationBox = gui.createTabPage(self.mainTab, "Filters")

    def commit(self):
        pageconf = self.attributePagesTabWidget.currentWidget().widget()
        format = pageconf.outFormats

        self.error(100)
        if not self.hasBiopython and format.lower() == "fasta":
            self.error(100, "Cannot parse FASTA format")
            return

        query = pageconf.query()
        bydatasets = defaultdict(lambda: ([], []))

        for conftype, tree, val in query:
            dataset = self.dataset

            if conftype == "Attribute":
                bydatasets[dataset][0].append(tree.internalName)
            elif conftype == "Filter":
                bydatasets[dataset][1].append((tree.internalName, val))

        if self.SHOW_FILTERS:
            pageconf = self.filterPagesTabWidget.currentWidget().widget()
            query = pageconf.query()

            for conftype, tree, val in query:
                dataset = self.dataset

                if conftype == "Attribute":
                    bydatasets[dataset][0].append(tree.internalName)
                elif conftype == "Filter":
                    bydatasets[dataset][1].append((tree.internalName, val))

        query = self.registry.query(
            format="TSV" if "tsv" in format.lower() else format.upper(),
            uniqueRows=self.uniqueRows,
            virtualSchema=dataset.virtualSchema,
            serverVirtualSchema=dataset.serverVirtualSchema
        )

        for dataset, (attributes, filters) in bydatasets.items():
            query.set_dataset(dataset if dataset else self.dataset)
            for attr in attributes:
                query.add_attribute(attr)
            for filter, value in filters:
                query.add_filter(filter, value)

        self.error(0)
        self.setEnabled(False)
        self._task = task = concurrent.Task(function=query.get_table)
        task.resultReady.connect(self.dataReady)
        task.exceptionReady.connect(self._handleException)
        self._executor.submit(task)

    def dataReady(self, data):
        self.setEnabled(True)
        self.send("Data", data)

    def pushAction(self, action):
        ref = action.ref
        ref_widget = self.findChild(QWidget, ref)
        if hasattr(ref_widget, "setOptions"):
            ref_widget.setOptions(action.subelements_top("Option"))

    def registerDelayedCall(self, call):
        self._afterInitQueue.append(call)

    def afterInit(self):
        while self._afterInitQueue:
            call = self._afterInitQueue.pop(0)
            call()

    def clearCache(self):
        self.registry.connection.clear_cache()
Ejemplo n.º 2
0
class OWBioMart(widget.OWWidget):
    name = "BioMart"
    description = "Query BioMart service"
    icon = "../widgets/icons/BioMart.svg"
    priority = 2010

    outputs = [("Data", Orange.data.Table)]

    SHOW_FILTERS = True

    selectedService = settings.Setting(MartServices[0][1])
    selectedDataset = settings.Setting(0)

    def __init__(self, parent=None):
        super().__init__(parent)

        self.selectedDatabase = 0
        self.uniqueRows = True

        gui.button(gui.widgetBox(self.controlArea, "Cache", addSpace=True),
                   self, "Clear cache",
                   tooltip="Clear saved query results",
                   callback=self.clearCache)
        self.serviceindex = 0
        self.serviceCombo = gui.comboBox(
            self.controlArea, self, "serviceindex", "Mart Service",
            callback=self._setServiceUrl
        )
        for name, url in MartServices:
            self.serviceCombo.addItem(name, userData=url)
        idx = self.serviceCombo.findData(self.selectedService, Qt.UserRole)
        self.serviceCombo.setCurrentIndex(idx)
        # self.selectedService = self.serviceCombo.itemData(self.serviceCombo.currentItem())

        self.martsCombo = gui.comboBox(
            self.controlArea, self, "selectedDatabase", "Database",
            callback=self.setSelectedMart,
            addSpace=True)
        self.martsCombo.setMaximumWidth(250)

        self.datasetsCombo = gui.comboBox(
            self.controlArea, self, "selectedDataset", "Dataset",
            callback=self.setSelectedDataset,
            addSpace=True)

        self.datasetsCombo.setMaximumWidth(250)

        gui.rubber(self.controlArea)

        box = gui.widgetBox(self.controlArea, "Results")
        gui.checkBox(
            box, self, "uniqueRows", "Unique results only",
            tooltip="Return unique results only.",)

        self.commitButton = gui.button(
            box, self, "Get Results", callback=self.commit,
            tooltip="Query the BioMart server and output the results",
            autoDefault=True)

        self.commitButton.setEnabled(False)

        self.mainWidget = gui.widgetBox(
            self.mainArea, orientation=QStackedLayout())

        self.mainTab = QTabWidget()

        self.mainWidget.layout().addWidget(self.mainTab)

        self.attributesConfigurationBox = gui.createTabPage(self.mainTab, "Attributes")

        if self.SHOW_FILTERS:  # ??
            self.filtersConfigurationBox = gui.createTabPage(self.mainTab, "Filters")

        self.error(0)
        self.setEnabled(False)
        self._task = None
        self._executor = concurrent.ThreadExecutor(
            threadPool=QThreadPool(maxThreadCount=2)
        )
        service = self.selectedService
        self._task = task = concurrent.Task(
            function=partial(self._get_registry, url=service))
        task.resultReady.connect(self.setBioMartRegistry)
        task.exceptionReady.connect(self._handleException)
        self._executor.submit(task)
        self._setServiceUrl()
        self._afterInitQueue = []

        try:
            from Bio import SeqIO
            self.hasBiopython = True
        except ImportError:
            self.warning(100, "Biopython package not found.\nTo retrieve FASTA sequence data from BioMart install Biopython.")
            self.hasBiopython = False

    def sizeHint(self):
        return QSize(800, 600)

    def _setServiceUrl(self):
        service = self.serviceCombo.itemData(self.serviceCombo.currentIndex())
        if service is not None:
            self.selectedService = service
            self._task = task = concurrent.Task(
                function=partial(self._get_registry, url=service))
            task.resultReady.connect(self.setBioMartRegistry)
            task.exceptionReady.connect(self._handleException)
            self._executor.submit(task)

    @staticmethod
    def _get_registry(url=None, precache=True):
        if url is None:
            url = MartServices[0][1]
        con = biomart.BioMartConnection(address=url, timeout=30)
        reg = biomart.BioMartRegistry(con)
        if precache:
            _ = reg.marts()
        return reg

    @Slot(Exception)
    def _handleException(self, exception):
        assert(QThread.currentThread() is self.thread())
        print("Task failed with:", exception, file=sys.stderr)
        import logging
        log = logging.getLogger(__name__)
        log.exception("Error:", exc_info=exception)
        self.error(0, str(exception))
        self.setEnabled(True)

    @Slot(object)
    def setBioMartRegistry(self, registry):
        assert(QThread.currentThread() is self.thread())
        self.setEnabled(True)
        self.registry = registry
        self.marts = [mart for mart in self.registry.marts()
                      if getattr(mart, "visible", "0") != "0"]

        self.martsCombo.clear()
        for mart in self.marts:
            self.martsCombo.addItem(mart.displayName)

    def setSelectedMart(self):
        self.mart = self.marts[self.selectedDatabase]
        self.error(0)
        self.setEnabled(False)

        self._task = task = concurrent.Task(function=self.mart.datasets)
        task.resultReady.connect(self.setBioMartDatasets)
        task.exceptionReady.connect(self._handleException)
        self._executor.submit(task)

    @Slot(object)
    def setBioMartDatasets(self, datasets):
        assert(QThread.currentThread() is self.thread())
        self.setEnabled(True)
        self.datasets = [data for data in datasets if
                         getattr(data, "visible", "0") != "0"]
        self.datasetsCombo.clear()
        self.datasetsCombo.addItems([data.displayName for data in self.datasets])

    def setSelectedDataset(self):
        self.dataset = self.datasets[self.selectedDataset]
        self.error(0)
        self.setEnabled(False)

        def get_configuration(dataset):
            connection = dataset.connection
            stream = connection.configuration(
                dataset=dataset.internalName,
                virtualSchema=dataset.serverVirtualSchema)
            response = stream.read()
            return response

        self._task = task = concurrent.Task(
            function=partial(get_configuration, self.dataset))

        task.resultReady.connect(self.setBioMartConfiguration)
        task.exceptionReady.connect(self._handleException)

        self._executor.submit(task)

    @Slot(object)
    def setBioMartConfiguration(self, configuration):
        assert(QThread.currentThread() is self.thread())
        self.setEnabled(True)
        # parse the xml in the main thread (a long time ago this step was
        # done in a thread but would frequently cause `expat` to segfault.
        doc = biomart.parseXML(io.BytesIO(configuration))
        config = list(doc.elements("DatasetConfig"))[0]
        configuration = biomart.DatasetConfig(
            self.registry, config.tag, config.attributes, config.children)

        self.clearConfiguration()

        self.configuration = configuration

        def hidden(tree):
            return getattr(tree, "hidden", "false") != "false" or \
                   getattr(tree, "hideDisplay", "false") != "false"

        self.attributePagesTabWidget = tabs = gui.tabWidget(self.attributesConfigurationBox)

        for page in configuration.elements("AttributePage"):
            if not hidden(page):
                page_widget = PageWidget(page, self.dataset, self)
                gui.createTabPage(tabs, getattr(page, "displayName", ""),
                                  widgetToAdd=page_widget, canScroll=True)

        if self.SHOW_FILTERS:
            self.filterPagesTabWidget = tabs = gui.tabWidget(self.filtersConfigurationBox)
            for page in configuration.elements("FilterPage"):
                if not hidden(page):
                    page_widget = PageWidget(page, self.dataset, self)
                    gui.createTabPage(tabs, getattr(page, "displayName", ""),
                                      widgetToAdd=page_widget, canScroll=True)

        self.afterInit()

        self.commitButton.setEnabled(True)

    def clearConfiguration(self):
        self.mainTab.deleteLater()

        self.mainTab = QTabWidget()
        self.mainWidget.layout().addWidget(self.mainTab)
        self.mainWidget.layout().setCurrentWidget(self.mainTab)

        self.attributesConfigurationBox = gui.createTabPage(self.mainTab, "Attributes")
        if self.SHOW_FILTERS:
            self.filtersConfigurationBox = gui.createTabPage(self.mainTab, "Filters")

    def commit(self):
        pageconf = self.attributePagesTabWidget.currentWidget().widget()
        format = pageconf.outFormats

        self.error(100)
        if not self.hasBiopython and format.lower() == "fasta":
            self.error(100, "Cannot parse FASTA format")
            return

        query = pageconf.query()
        bydatasets = defaultdict(lambda: ([], []))

        for conftype, tree, val in query:
            dataset = self.dataset

            if conftype == "Attribute":
                bydatasets[dataset][0].append(tree.internalName)
            elif conftype == "Filter":
                bydatasets[dataset][1].append((tree.internalName, val))

        if self.SHOW_FILTERS:
            pageconf = self.filterPagesTabWidget.currentWidget().widget()
            query = pageconf.query()

            for conftype, tree, val in query:
                dataset = self.dataset

                if conftype == "Attribute":
                    bydatasets[dataset][0].append(tree.internalName)
                elif conftype == "Filter":
                    bydatasets[dataset][1].append((tree.internalName, val))

        query = self.registry.query(
            format="TSV" if "tsv" in format.lower() else format.upper(),
            uniqueRows=self.uniqueRows,
            virtualSchema=dataset.virtualSchema,
            serverVirtualSchema=dataset.serverVirtualSchema
        )

        for dataset, (attributes, filters) in bydatasets.items():
            query.set_dataset(dataset if dataset else self.dataset)
            for attr in attributes:
                query.add_attribute(attr)
            for filter, value in filters:
                query.add_filter(filter, value)

        self.error(0)
        self.setEnabled(False)
        self._task = task = concurrent.Task(function=query.get_table)
        task.resultReady.connect(self.dataReady)
        task.exceptionReady.connect(self._handleException)
        self._executor.submit(task)

    def dataReady(self, data):
        self.setEnabled(True)
        self.send("Data", data)

    def pushAction(self, action):
        ref = action.ref
        ref_widget = self.findChild(QWidget, ref)
        if hasattr(ref_widget, "setOptions"):
            ref_widget.setOptions(action.subelements_top("Option"))

    def registerDelayedCall(self, call):
        self._afterInitQueue.append(call)

    def afterInit(self):
        while self._afterInitQueue:
            call = self._afterInitQueue.pop(0)
            call()

    def clearCache(self):
        self.registry.connection.clear_cache()