Beispiel #1
0
class OWPIPAx(widget.OWWidget):
    name = "PIPAx"
    description = "Access data from PIPA RNA-Seq database."
    icon = "../widgets/icons/PIPA.svg"
    priority = 35

    inputs = []
    outputs = [("Data", Orange.data.Table)]

    username = settings.Setting("")
    password = settings.Setting("")

    log2 = settings.Setting(False)
    rtypei = settings.Setting(5)  # hardcoded rpkm mapability polya
    excludeconstant = settings.Setting(False)
    joinreplicates = settings.Setting(False)
    #: The stored current selection (in experiments view)
    #: SelectionByKey | None
    currentSelection = settings.Setting(None)
    #: Stored selections (presets)
    #: list of SelectionByKey
    storedSelections = settings.Setting([])
    #: Stored column sort keys (from Sort view)
    #: list of strings
    storedSortingOrder = settings.Setting(
        ["Strain", "Experiment", "Genotype", "Timepoint"])

    experimentsHeaderState = settings.Setting(
        {name: False for _, name in HEADER[:ID_INDEX + 1]}
    )

    def __init__(self, parent=None, signalManager=None, name="PIPAx"):
        super().__init__(parent)

        self.selectedExperiments = []
        self.buffer = dicty.CacheSQLite(bufferfile)

        self.searchString = ""

        self.result_types = []
        self.mappings = {}

        self.controlArea.setMaximumWidth(250)
        self.controlArea.setMinimumWidth(250)

        gui.button(self.controlArea, self, "Reload",
                     callback=self.Reload)
        gui.button(self.controlArea, self, "Clear cache",
                     callback=self.clear_cache)

        b = gui.widgetBox(self.controlArea, "Experiment Sets")
        self.selectionSetsWidget = SelectionSetsWidget(self)
        self.selectionSetsWidget.setSizePolicy(
            QSizePolicy.Preferred, QSizePolicy.Maximum)

        def store_selections(modified):
            if not modified:
                self.storedSelections = self.selectionSetsWidget.selections

        self.selectionSetsWidget.selectionModified.connect(store_selections)
        b.layout().addWidget(self.selectionSetsWidget)

        gui.separator(self.controlArea)

        b = gui.widgetBox(self.controlArea, "Sort output columns")
        self.columnsSortingWidget = SortedListWidget(self)
        self.columnsSortingWidget.setSizePolicy(
            QSizePolicy.Preferred, QSizePolicy.Maximum)

        def store_sort_order():
            self.storedSortingOrder = self.columnsSortingWidget.sortingOrder
        self.columnsSortingWidget.sortingOrderChanged.connect(store_sort_order)
        b.layout().addWidget(self.columnsSortingWidget)
        sorting_model = QStringListModel(SORTING_MODEL_LIST)
        self.columnsSortingWidget.setModel(sorting_model)

        gui.separator(self.controlArea)

        box = gui.widgetBox(self.controlArea, 'Expression Type')
        self.expressionTypesCB = gui.comboBox(
            box, self, "rtypei", items=[], callback=self.UpdateResultsList)

        gui.checkBox(self.controlArea, self, "excludeconstant",
                     "Exclude labels with constant values")

        gui.checkBox(self.controlArea, self, "joinreplicates",
                     "Average replicates (use median)")

        gui.checkBox(self.controlArea, self, "log2",
                     "Logarithmic (base 2) transformation")

        self.commit_button = gui.button(self.controlArea, self, "&Commit",
                                        callback=self.Commit)
        self.commit_button.setDisabled(True)

        gui.rubber(self.controlArea)

        box = gui.widgetBox(self.controlArea, "Authentication")

        gui.lineEdit(box, self, "username", "Username:"******"password", "Password:"******"searchString", "Search",
                     callbackOnType=True,
                     callback=self.SearchUpdate)

        self.headerLabels = [t[1] for t in HEADER]

        self.experimentsWidget = QTreeWidget()
        self.experimentsWidget.setHeaderLabels(self.headerLabels)
        self.experimentsWidget.setSelectionMode(QTreeWidget.ExtendedSelection)
        self.experimentsWidget.setRootIsDecorated(False)
        self.experimentsWidget.setSortingEnabled(True)

        contextEventFilter = gui.VisibleHeaderSectionContextEventFilter(
            self.experimentsWidget, self.experimentsWidget
        )

        self.experimentsWidget.header().installEventFilter(contextEventFilter)
        self.experimentsWidget.setItemDelegateForColumn(
            0, gui.IndicatorItemDelegate(self, role=Qt.DisplayRole))

        self.experimentsWidget.setAlternatingRowColors(True)

        self.experimentsWidget.selectionModel().selectionChanged.connect(
            self.onSelectionChanged)

        self.selectionSetsWidget.setSelectionModel(
            self.experimentsWidget.selectionModel()
        )

        self.mainArea.layout().addWidget(self.experimentsWidget)

        # Restore the selection states from the stored settings
        self.selectionSetsWidget.selections = self.storedSelections
        self.columnsSortingWidget.sortingOrder = self.storedSortingOrder

        self.restoreHeaderState()

        self.experimentsWidget.header().geometriesChanged.connect(
            self.saveHeaderState)

        self.dbc = None

        self.AuthSet()

        QTimer.singleShot(100, self.UpdateExperiments)

    def sizeHint(self):
        return QSize(800, 600)

    def AuthSet(self):
        if len(self.username):
            self.passf.setDisabled(False)
        else:
            self.passf.setDisabled(True)

    def AuthChanged(self):
        self.AuthSet()
        self.ConnectAndUpdate()

    def ConnectAndUpdate(self):
        self.Connect()
        self.UpdateExperiments(reload=True)

    def Connect(self):
        self.error(1)
        self.warning(1)

        def en(x):
            return x if len(x) else None

        self.dbc = dicty.PIPAx(cache=self.buffer,
                               username=en(self.username),
                               password=self.password)

        # check password
        if en(self.username) != None:
            try:
                self.dbc.mappings(reload=True)
            except dicty.AuthenticationError:
                self.error(1, "Wrong username or password")
                self.dbc = None
            except Exception as ex:
                print("Error when contacting the PIPA database", ex)
                sys.excepthook(*sys.exc_info())
                try:  # maybe cached?
                    self.dbc.mappings()
                    self.warning(1, "Can not access database - using cached data.")
                except Exception as ex:
                    self.dbc = None
                    self.error(1, "Can not access database.")

    def Reload(self):
        self.UpdateExperiments(reload=True)

    def clear_cache(self):
        self.buffer.clear()
        self.Reload()

    def rtype(self):
        """Return selected result template type """
        if self.result_types:
            return self.result_types[self.rtypei][0]
        else:
            return "-1"

    def UpdateExperimentTypes(self):
        self.expressionTypesCB.clear()
        items = [desc for _, desc in self.result_types]
        self.expressionTypesCB.addItems(items)
        self.rtypei = max(0, min(self.rtypei, len(self.result_types) - 1))

    def UpdateExperiments(self, reload=False):
        self.experimentsWidget.clear()
        self.items = []

        self.progressBarInit()

        if not self.dbc:
            self.Connect()

        mappings = {}
        result_types = []
        sucind = False  # success indicator for database index

        try:
            mappings = self.dbc.mappings(reload=reload)
            result_types = self.dbc.result_types(reload=reload)
            sucind = True
        except Exception as ex:
            try:
                mappings = self.dbc.mappings()
                result_types = self.dbc.result_types()
                self.warning(0, "Can not access database - using cached data.")
                sucind = True
            except Exception as ex:
                self.error(0, "Can not access database.")

        if sucind:
            self.warning(0)
            self.error(0)

        self.mappings = mappings
        self.result_types = result_types

        self.UpdateExperimentTypes()
        self.UpdateResultsList(reload=reload)

        self.progressBarFinished()

        if self.currentSelection:
            self.currentSelection.select(
                self.experimentsWidget.selectionModel())

        self.handle_commit_button()

    def UpdateResultsList(self, reload=False):

        results_list = {}
        try:
            results_list = self.dbc.results_list(self.rtype(), reload=reload)
        except Exception as ex:
            try:
                results_list = self.dbc.results_list(self.rtype())
            except Exception as ex:
                self.error(0, "Can not access database.")

        self.results_list = results_list
        mappings_key_dict = dict(((m["data_id"], m["id"]), key) \
                                 for key, m in self.mappings.items())

        def mapping_unique_id(annot):
            """Map annotations dict from results_list to unique
            `mappings` ids.
            """
            data_id, mappings_id = annot["data_id"], annot["mappings_id"]
            return mappings_key_dict[data_id, mappings_id]

        elements = []

        # softly change the view so that the selection stays the same

        items_shown = {}
        for i, item in enumerate(self.items):
            c = str(item.text(10))
            items_shown[c] = i

        items_to_show = dict((mapping_unique_id(annot), annot)
                             for annot in self.results_list.values())

        add_items = set(items_to_show) - set(items_shown)
        delete_items = set(items_shown) - set(items_to_show)

        i = 0
        while i < self.experimentsWidget.topLevelItemCount():
            it = self.experimentsWidget.topLevelItem(i)
            if str(it.text(10)) in delete_items:
                self.experimentsWidget.takeTopLevelItem(i)
            else:
                i += 1

        delete_ind = set([items_shown[i] for i in delete_items])
        self.items = [it for i, it in enumerate(self.items) if i not in delete_ind]

        for r_annot in [items_to_show[i] for i in add_items]:
            d = defaultdict(lambda: "?", r_annot)
            row_items = [""] + [d.get(key, "?") for key, _ in HEADER[1:]]
            try:
                time_dict = literal_eval(row_items[DATE_INDEX])
                date_rna = date(time_dict["fullYearUTC"],
                                time_dict["monthUTC"] + 1,  # Why is month 0 based?
                                time_dict["dateUTC"])
                row_items[DATE_INDEX] = date_rna.strftime("%x")
            except Exception:
                row_items[DATE_INDEX] = ''

            row_items[ID_INDEX] = mapping_unique_id(r_annot)
            elements.append(row_items)

            ci = MyTreeWidgetItem(self.experimentsWidget, row_items)

            self.items.append(ci)

        for i in range(len(self.headerLabels)):
            self.experimentsWidget.resizeColumnToContents(i)

        # which is the ok buffer version
        # FIXME: what attribute to use for version?
        self.wantbufver = \
            lambda x, ad=self.results_list: \
            defaultdict(lambda: "?", ad[x])["date"]

        self.wantbufver = lambda x: "0"

        self.UpdateCached()

    def UpdateCached(self):
        if self.wantbufver and self.dbc:
            fn = self.dbc.download_key_function()
            result_id_key = dict(((m["data_id"], m["mappings_id"]), key) \
                                 for key, m in self.results_list.items())

            for item in self.items:
                c = str(item.text(10))
                mapping = self.mappings[c]
                data_id, mappings_id = mapping["data_id"], mapping["id"]
                r_id = result_id_key[data_id, mappings_id]
                # Get the buffered version
                buffered = self.dbc.inBuffer(fn(r_id))
                value = " " if buffered == self.wantbufver(r_id) else ""
                item.setData(0, Qt.DisplayRole, value)

    def SearchUpdate(self, string=""):
        for item in self.items:
            item.setHidden(not all(s in item \
                                   for s in self.searchString.split())
                           )

    def Commit(self):
        if not self.dbc:
            self.Connect()

        pb = gui.ProgressBar(self, iterations=100)

        table = None

        ids = []
        for item in self.experimentsWidget.selectedItems():
            unique_id = str(item.text(10))
            annots = self.mappings[unique_id]
            ids.append((annots["data_id"], annots["id"]))

        transfn = None
        if self.log2:
            transfn = lambda x: math.log(x + 1.0, 2)

        reverse_header_dict = dict((name, key) for key, name in HEADER)

        hview = self.experimentsWidget.header()
        shownHeaders = [label for i, label in \
                        list(enumerate(self.headerLabels))[1:] \
                        if not hview.isSectionHidden(i)
                        ]

        allowed_labels = [reverse_header_dict.get(label, label) \
                          for label in shownHeaders]

        if self.joinreplicates and "id" not in allowed_labels:
            # need 'id' labels in join_replicates for attribute names
            allowed_labels.append("id")

        if len(ids):
            table = self.dbc.get_data(ids=ids, result_type=self.rtype(),
                                      callback=pb.advance,
                                      exclude_constant_labels=self.excludeconstant,
                                      #                          bufver=self.wantbufver,
                                      transform=transfn,
                                      allowed_labels=allowed_labels)

            if self.joinreplicates:
                table = dicty.join_replicates(table,
                                              ignorenames=["replicate", "data_id", "mappings_id",
                                                           "data_name", "id", "unique_id"],
                                              namefn=None,
                                              avg=dicty.median
                                              )

            # Sort attributes
            sortOrder = self.columnsSortingWidget.sortingOrder

            all_values = defaultdict(set)
            for at in table.domain.attributes:
                atts = at.attributes
                for name in sortOrder:
                    all_values[name].add(atts.get(reverse_header_dict[name], ""))

            isnum = {}
            for at, vals in all_values.items():
                vals = filter(None, vals)
                try:
                    for a in vals:
                        float(a)
                    isnum[at] = True
                except:
                    isnum[at] = False

            def optfloat(x, at):
                if x == "":
                    return ""
                else:
                    return float(x) if isnum[at] else x

            def sorting_key(attr):
                atts = attr.attributes
                return tuple([optfloat(atts.get(reverse_header_dict[name], ""), name) \
                              for name in sortOrder])

            attributes = sorted(table.domain.attributes,
                                key=sorting_key)

            domain = Orange.data.Domain(
                attributes, table.domain.class_var, table.domain.metas)
            table = table.from_table(domain, table)

            data_hints.set_hint(table, "taxid", "352472")
            data_hints.set_hint(table, "genesinrows", False)

            self.send("Data", table)

            self.UpdateCached()

        pb.finish()

    def onSelectionChanged(self, selected, deselected):
        self.handle_commit_button()

    def handle_commit_button(self):
        self.currentSelection = \
            SelectionByKey(self.experimentsWidget.selectionModel().selection(),
                           key=(1, 2, 3, 10))
        self.commit_button.setDisabled(not len(self.currentSelection))

    def saveHeaderState(self):
        hview = self.experimentsWidget.header()
        for i, label in enumerate(self.headerLabels):
            self.experimentsHeaderState[label] = hview.isSectionHidden(i)

    def restoreHeaderState(self):
        hview = self.experimentsWidget.header()
        state = self.experimentsHeaderState
        for i, label in enumerate(self.headerLabels):
            hview.setSectionHidden(i, state.get(label, True))
            self.experimentsWidget.resizeColumnToContents(i)
class OWDatabasesUpdate(OWWidget):

    name = "Databases Update"
    description = "Update local systems biology databases."
    icon = "../widgets/icons/Databases.svg"
    priority = 10

    inputs = []
    outputs = []

    want_main_area = False

    def __init__(self, parent=None, signalManager=None,
                 name="Databases update"):
        OWWidget.__init__(self, parent, signalManager, name,
                          wantMainArea=False)

        self.searchString = ""

        fbox = gui.widgetBox(self.controlArea, "Filter")
        self.completer = TokenListCompleter(
            self, caseSensitivity=Qt.CaseInsensitive)
        self.lineEditFilter = QLineEdit(textChanged=self.SearchUpdate)
        self.lineEditFilter.setCompleter(self.completer)

        fbox.layout().addWidget(self.lineEditFilter)

        box = gui.widgetBox(self.controlArea, "Files")

        self.filesView = QTreeWidget(self)
        self.filesView.setHeaderLabels(
            ["", "Data Source", "Update", "Last Updated", "Size"])

        self.filesView.setRootIsDecorated(False)
        self.filesView.setUniformRowHeights(True)
        self.filesView.setSelectionMode(QAbstractItemView.NoSelection)
        self.filesView.setSortingEnabled(True)
        self.filesView.sortItems(1, Qt.AscendingOrder)
        self.filesView.setItemDelegateForColumn(
            0, UpdateOptionsItemDelegate(self.filesView))

        self.filesView.model().layoutChanged.connect(self.SearchUpdate)

        box.layout().addWidget(self.filesView)

        box = gui.widgetBox(self.controlArea, orientation="horizontal")
        self.updateButton = gui.button(
            box, self, "Update all",
            callback=self.UpdateAll,
            tooltip="Update all updatable files",
         )

        self.downloadButton = gui.button(
            box, self, "Download all",
            callback=self.DownloadFiltered,
            tooltip="Download all filtered files shown"
        )

        self.cancelButton = gui.button(
            box, self, "Cancel", callback=self.Cancel,
            tooltip="Cancel scheduled downloads/updates."
        )

        self.retryButton = gui.button(
            box, self, "Reconnect", callback=self.RetrieveFilesList
        )
        self.retryButton.hide()

        gui.rubber(box)
        self.warning(0)

        box = gui.widgetBox(self.controlArea, orientation="horizontal")
        gui.rubber(box)

        self.infoLabel = QLabel()
        self.infoLabel.setAlignment(Qt.AlignCenter)

        self.controlArea.layout().addWidget(self.infoLabel)
        self.infoLabel.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Fixed)

        self.updateItems = []

        self.resize(800, 600)

        self.progress = ProgressState(self, maximum=3)
        self.progress.valueChanged.connect(self._updateProgress)
        self.progress.rangeChanged.connect(self._updateProgress)
        self.executor = ThreadExecutor(
            threadPool=QThreadPool(maxThreadCount=2)
        )

        task = Task(self, function=self.RetrieveFilesList)
        task.exceptionReady.connect(self.HandleError)
        task.start()

        self._tasks = []
        self._haveProgress = False

    def RetrieveFilesList(self):
        self.retryButton.hide()
        self.warning(0)
        self.progress.setRange(0, 3)

        task = Task(function=partial(retrieveFilesList, methodinvoke(self.progress, "advance")))

        task.resultReady.connect(self.SetFilesList)
        task.exceptionReady.connect(self.HandleError)

        self.executor.submit(task)

        self.setEnabled(False)

    def SetFilesList(self, serverInfo):
        """
        Set the files to show.
        """
        self.setEnabled(True)

        localInfo = serverfiles.allinfo()
        all_tags = set()

        self.filesView.clear()
        self.updateItems = []

        for item in join_info_dict(localInfo, serverInfo):
            tree_item = UpdateTreeWidgetItem(item)
            options_widget = UpdateOptionsWidget(item.state)
            options_widget.item = item

            options_widget.installClicked.connect(
                partial(self.SubmitDownloadTask, item.domain, item.filename)
            )
            options_widget.removeClicked.connect(
                partial(self.SubmitRemoveTask, item.domain, item.filename)
            )

            self.updateItems.append((item, tree_item, options_widget))
            all_tags.update(item.tags)

        self.filesView.addTopLevelItems(
            [tree_item for _, tree_item, _ in self.updateItems]
        )

        for item, tree_item, options_widget in self.updateItems:
            self.filesView.setItemWidget(tree_item, 0, options_widget)

            # Add an update button if the file is updateable
            if item.state == OUTDATED:
                button = QToolButton(
                    None, text="Update",
                    maximumWidth=120,
                    minimumHeight=20,
                    maximumHeight=20
                )

                if sys.platform == "darwin":
                    button.setAttribute(Qt.WA_MacSmallSize)

                button.clicked.connect(
                    partial(self.SubmitDownloadTask, item.domain,
                            item.filename)
                )

                self.filesView.setItemWidget(tree_item, 2, button)

        self.progress.advance()

        self.filesView.setColumnWidth(0, self.filesView.sizeHintForColumn(0))

        for column in range(1, 4):
            contents_hint = self.filesView.sizeHintForColumn(column)
            header_hint = self.filesView.header().sectionSizeHint(column)
            width = max(min(contents_hint, 400), header_hint)
            self.filesView.setColumnWidth(column, width)

        hints = [hint for hint in sorted(all_tags) if not hint.startswith("#")]
        self.completer.setTokenList(hints)
        self.SearchUpdate()
        self.UpdateInfoLabel()
        self.toggleButtons()
        self.cancelButton.setEnabled(False)

        self.progress.setRange(0, 0)

    def buttonCheck(self, selected_items, state, button):
        for item in selected_items:
            if item.state != state:
                button.setEnabled(False)
            else:
                button.setEnabled(True)
                break

    def toggleButtons(self):
        selected_items = [item for item, tree_item, _ in self.updateItems if not tree_item.isHidden()]
        self.buttonCheck(selected_items, OUTDATED, self.updateButton)
        self.buttonCheck(selected_items, AVAILABLE, self.downloadButton)

    def HandleError(self, exception):
        if isinstance(exception, ConnectionError):
            self.warning(0,
                       "Could not connect to server! Check your connection "
                       "and try to reconnect.")
            self.SetFilesList({})
            self.retryButton.show()
        else:
            sys.excepthook(type(exception), exception, None)
            self.progress.setRange(0, 0)
            self.setEnabled(True)

    def UpdateInfoLabel(self):
        local = [item for item, tree_item, _ in self.updateItems
                 if item.state != AVAILABLE and not tree_item.isHidden()]
        size = sum(float(item.size) for item in local)

        onServer = [item for item, tree_item, _ in self.updateItems if not tree_item.isHidden()]
        sizeOnServer = sum(float(item.size) for item in onServer)

        text = ("%i items, %s (on server: %i items, %s)" %
                (len(local),
                 sizeof_fmt(size),
                 len(onServer),
                 sizeof_fmt(sizeOnServer)))

        self.infoLabel.setText(text)

    def UpdateAll(self):
        self.warning(0)
        for item, tree_item, _ in self.updateItems:
            if item.state == OUTDATED and not tree_item.isHidden():
                self.SubmitDownloadTask(item.domain, item.filename)

    def DownloadFiltered(self):
        # TODO: submit items in the order shown.
        for item, tree_item, _ in self.updateItems:
            if not tree_item.isHidden() and item.state in \
                    [AVAILABLE, OUTDATED]:
                self.SubmitDownloadTask(item.domain, item.filename)

    def SearchUpdate(self, searchString=None):
        strings = str(self.lineEditFilter.text()).split()
        for item, tree_item, _ in self.updateItems:
            hide = not all(UpdateItem_match(item, string)
                           for string in strings)
            tree_item.setHidden(hide)
        self.UpdateInfoLabel()
        self.toggleButtons()

    def SubmitDownloadTask(self, domain, filename):
        """
        Submit the (domain, filename) to be downloaded/updated.
        """
        self.cancelButton.setEnabled(True)

        index = self.updateItemIndex(domain, filename)
        _, tree_item, opt_widget = self.updateItems[index]

        sf = LocalFiles(serverfiles.PATH, serverfiles.ServerFiles())
        task = DownloadTask(domain, filename, sf)

        self.progress.adjustRange(0, 100)

        pb = ItemProgressBar(self.filesView)
        pb.setRange(0, 100)
        pb.setTextVisible(False)

        task.advanced.connect(pb.advance)
        task.advanced.connect(self.progress.advance)
        task.finished.connect(pb.hide)
        task.finished.connect(self.onDownloadFinished, Qt.QueuedConnection)
        task.exception.connect(self.onDownloadError, Qt.QueuedConnection)

        self.filesView.setItemWidget(tree_item, 2, pb)

        # Clear the text so it does not show behind the progress bar.
        tree_item.setData(2, Qt.DisplayRole, "")
        pb.show()

        # Disable the options widget
        opt_widget.setEnabled(False)
        self._tasks.append(task)

        self.executor.submit(task)

    def EndDownloadTask(self, task):
        future = task.future()
        index = self.updateItemIndex(task.domain, task.filename)
        item, tree_item, opt_widget = self.updateItems[index]

        self.filesView.removeItemWidget(tree_item, 2)
        opt_widget.setEnabled(True)

        if future.cancelled():
            # Restore the previous state
            tree_item.setUpdateItem(item)
            opt_widget.setState(item.state)

        elif future.exception():
            tree_item.setUpdateItem(item)
            opt_widget.setState(item.state)

            # Show the exception string in the size column.
            self.warning(0, "Error while downloading. Check your connection "
                            "and retry.")

            # recreate button for download
            button = QToolButton(
                None, text="Retry",
                maximumWidth=120,
                minimumHeight=20,
                maximumHeight=20
            )

            if sys.platform == "darwin":
                button.setAttribute(Qt.WA_MacSmallSize)

            button.clicked.connect(
                partial(self.SubmitDownloadTask, item.domain,
                        item.filename)
            )

            self.filesView.setItemWidget(tree_item, 2, button)

        else:
            # get the new updated info dict and replace the the old item
            self.warning(0)
            info = serverfiles.info(item.domain, item.filename)
            new_item = update_item_from_info(item.domain, item.filename,
                                             info, info)

            self.updateItems[index] = (new_item, tree_item, opt_widget)

            tree_item.setUpdateItem(new_item)
            opt_widget.setState(new_item.state)

            self.UpdateInfoLabel()

    def SubmitRemoveTask(self, domain, filename):
        serverfiles.LOCALFILES.remove(domain, filename)
        index = self.updateItemIndex(domain, filename)
        item, tree_item, opt_widget = self.updateItems[index]

        if item.info_server:
            new_item = item._replace(state=AVAILABLE, local=None,
                                      info_local=None)
        else:
            new_item = item._replace(local=None, info_local=None)
            # Disable the options widget. No more actions can be performed
            # for the item.
            opt_widget.setEnabled(False)

        tree_item.setUpdateItem(new_item)
        opt_widget.setState(new_item.state)
        self.updateItems[index] = (new_item, tree_item, opt_widget)

        self.UpdateInfoLabel()

    def Cancel(self):
        """
        Cancel all pending update/download tasks (that have not yet started).
        """
        for task in self._tasks:
            task.future().cancel()

    def onDeleteWidget(self):
        self.Cancel()
        self.executor.shutdown(wait=False)
        OWWidget.onDeleteWidget(self)

    def onDownloadFinished(self):
        # on download completed/canceled/error
        assert QThread.currentThread() is self.thread()
        for task in list(self._tasks):
            future = task.future()
            if future.done():
                self.EndDownloadTask(task)
                self._tasks.remove(task)

        if not self._tasks:
            # Clear/reset the overall progress
            self.progress.setRange(0, 0)
            self.cancelButton.setEnabled(False)

    def onDownloadError(self, exc_info):
        sys.excepthook(*exc_info)
        self.warning(0, "Error while downloading. Check your connection and "
                        "retry.")

    def updateItemIndex(self, domain, filename):
        for i, (item, _, _) in enumerate(self.updateItems):
            if item.domain == domain and item.filename == filename:
                return i
        raise ValueError("%r, %r not in update list" % (domain, filename))

    def _updateProgress(self, *args):
        rmin, rmax = self.progress.range()
        if rmin != rmax:
            if not self._haveProgress:
                self._haveProgress = True
                self.progressBarInit()

            self.progressBarSet(self.progress.ratioCompleted() * 100,
                                processEvents=None)
        if rmin == rmax:
            self._haveProgress = False
            self.progressBarFinished()
Beispiel #3
0
class OWGEODatasets(OWWidget):
    name = "GEO Data Sets"
    description = DESCRIPTION
    icon = "../widgets/icons/GEODataSets.svg"
    priority = PRIORITY

    inputs = []
    outputs = [("Expression Data", Orange.data.Table)]

    settingsList = [
        "outputRows", "mergeSpots", "gdsSelectionStates", "splitterSettings",
        "currentGds", "autoCommit", "datasetNames"
    ]

    outputRows = Setting(True)
    mergeSpots = Setting(True)
    gdsSelectionStates = Setting({})
    currentGds = Setting(None)
    datasetNames = Setting({})
    splitterSettings = Setting((
        b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xea\x00\x00\x00\xd7\x01\x00\x00\x00\x07\x01\x00\x00\x00\x02',
        b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xb5\x00\x00\x02\x10\x01\x00\x00\x00\x07\x01\x00\x00\x00\x01'
    ))

    autoCommit = Setting(False)

    def __init__(self, parent=None, signalManager=None, name=" GEO Data Sets"):
        OWWidget.__init__(self, parent, signalManager, name)

        self.selectionChanged = False
        self.filterString = ""
        self.datasetName = ""

        ## GUI
        box = gui.widgetBox(self.controlArea, "Info", addSpace=True)
        self.infoBox = gui.widgetLabel(box, "Initializing\n\n")

        box = gui.widgetBox(self.controlArea, "Output", addSpace=True)
        gui.radioButtonsInBox(box,
                              self,
                              "outputRows",
                              ["Genes in rows", "Samples in rows"],
                              "Rows",
                              callback=self.commitIf)

        gui.checkBox(box,
                     self,
                     "mergeSpots",
                     "Merge spots of same gene",
                     callback=self.commitIf)

        gui.separator(box)
        self.nameEdit = gui.lineEdit(
            box,
            self,
            "datasetName",
            "Data set name",
            tooltip="Override the default output data set name",
            callback=self.onNameEdited)
        self.nameEdit.setPlaceholderText("")

        if sys.version_info < (3, ):
            box = gui.widgetBox(self.controlArea, "Commit", addSpace=True)
            self.commitButton = gui.button(box,
                                           self,
                                           "Commit",
                                           callback=self.commit)
            cb = gui.checkBox(box, self, "autoCommit", "Commit on any change")
            gui.setStopper(self, self.commitButton, cb, "selectionChanged",
                           self.commit)
        else:
            gui.auto_commit(self.controlArea,
                            self,
                            "autoCommit",
                            "Commit",
                            box="Commit")
            self.commitIf = self.commit

        gui.rubber(self.controlArea)

        gui.widgetLabel(self.mainArea, "Filter")
        self.filterLineEdit = QLineEdit(textChanged=self.filter)
        self.completer = TokenListCompleter(self,
                                            caseSensitivity=Qt.CaseInsensitive)
        self.filterLineEdit.setCompleter(self.completer)

        self.mainArea.layout().addWidget(self.filterLineEdit)

        splitter = QSplitter(Qt.Vertical, self.mainArea)
        self.mainArea.layout().addWidget(splitter)
        self.treeWidget = QTreeView(splitter)

        self.treeWidget.setSelectionMode(QTreeView.SingleSelection)
        self.treeWidget.setRootIsDecorated(False)
        self.treeWidget.setSortingEnabled(True)
        self.treeWidget.setAlternatingRowColors(True)
        self.treeWidget.setUniformRowHeights(True)
        self.treeWidget.setEditTriggers(QTreeView.NoEditTriggers)

        linkdelegate = LinkStyledItemDelegate(self.treeWidget)
        self.treeWidget.setItemDelegateForColumn(1, linkdelegate)
        self.treeWidget.setItemDelegateForColumn(8, linkdelegate)
        self.treeWidget.setItemDelegateForColumn(
            0, gui.IndicatorItemDelegate(self.treeWidget, role=Qt.DisplayRole))

        proxyModel = MySortFilterProxyModel(self.treeWidget)
        self.treeWidget.setModel(proxyModel)
        self.treeWidget.selectionModel().selectionChanged.connect(
            self.updateSelection)
        self.treeWidget.viewport().setMouseTracking(True)

        splitterH = QSplitter(Qt.Horizontal, splitter)

        box = gui.widgetBox(splitterH, "Description")
        self.infoGDS = gui.widgetLabel(box, "")
        self.infoGDS.setWordWrap(True)
        gui.rubber(box)

        box = gui.widgetBox(splitterH, "Sample Annotations")
        self.annotationsTree = QTreeWidget(box)
        self.annotationsTree.setHeaderLabels(
            ["Type (Sample annotations)", "Sample count"])
        self.annotationsTree.setRootIsDecorated(True)
        box.layout().addWidget(self.annotationsTree)
        self.annotationsTree.itemChanged.connect(
            self.annotationSelectionChanged)
        self._annotationsUpdating = False
        self.splitters = splitter, splitterH

        for sp, setting in zip(self.splitters, self.splitterSettings):
            sp.splitterMoved.connect(self.splitterMoved)
            sp.restoreState(setting)

        self.searchKeys = [
            "dataset_id", "title", "platform_organism", "description"
        ]

        self.gds = []
        self.gds_info = None

        self.resize(1000, 600)

        self.setBlocking(True)
        self.setEnabled(False)
        self.progressBarInit()

        self._executor = ThreadExecutor()

        func = partial(get_gds_model,
                       methodinvoke(self, "_setProgress", (float, )))
        self._inittask = Task(function=func)
        self._inittask.finished.connect(self._initializemodel)
        self._executor.submit(self._inittask)

        self._datatask = None

    @Slot(float)
    def _setProgress(self, value):
        self.progressBarValue = value

    def _initializemodel(self):
        assert self.thread() is QThread.currentThread()
        model, self.gds_info, self.gds = self._inittask.result()
        model.setParent(self)

        proxy = self.treeWidget.model()
        proxy.setFilterKeyColumn(0)
        proxy.setFilterRole(TextFilterRole)
        proxy.setFilterCaseSensitivity(False)
        proxy.setFilterFixedString(self.filterString)

        proxy.setSourceModel(model)
        proxy.sort(0, Qt.DescendingOrder)

        self.progressBarFinished()
        self.setBlocking(False)
        self.setEnabled(True)

        filter_items = " ".join(gds[key] for gds in self.gds
                                for key in self.searchKeys)
        tr_chars = ",.:;!?(){}[]_-+\\|/%#@$^&*<>~`"
        tr_table = str.maketrans(tr_chars, " " * len(tr_chars))
        filter_items = filter_items.translate(tr_table)

        filter_items = sorted(set(filter_items.split(" ")))
        filter_items = [item for item in filter_items if len(item) > 3]

        self.completer.setTokenList(filter_items)

        if self.currentGds:
            current_id = self.currentGds["dataset_id"]
            gdss = [(i, qunpack(proxy.data(proxy.index(i, 1), Qt.DisplayRole)))
                    for i in range(proxy.rowCount())]
            current = [i for i, data in gdss if data and data == current_id]
            if current:
                current_index = proxy.index(current[0], 0)
                self.treeWidget.selectionModel().select(
                    current_index,
                    QItemSelectionModel.Select | QItemSelectionModel.Rows)
                self.treeWidget.scrollTo(current_index,
                                         QTreeView.PositionAtCenter)

        for i in range(8):
            self.treeWidget.resizeColumnToContents(i)

        self.treeWidget.setColumnWidth(
            1, min(self.treeWidget.columnWidth(1), 300))
        self.treeWidget.setColumnWidth(
            2, min(self.treeWidget.columnWidth(2), 200))

        self.updateInfo()

    def updateInfo(self):
        gds_info = self.gds_info
        text = ("%i datasets\n%i datasets cached\n" %
                (len(gds_info),
                 len(glob.glob(serverfiles.localpath("GEO") + "/GDS*"))))
        filtered = self.treeWidget.model().rowCount()
        if len(self.gds) != filtered:
            text += ("%i after filtering") % filtered
        self.infoBox.setText(text)

    def updateSelection(self, *args):
        current = self.treeWidget.selectedIndexes()
        mapToSource = self.treeWidget.model().mapToSource
        current = [mapToSource(index).row() for index in current]
        if current:
            self.currentGds = self.gds[current[0]]
            self.setAnnotations(self.currentGds)
            self.infoGDS.setText(self.currentGds.get("description", ""))
            self.nameEdit.setPlaceholderText(self.currentGds["title"])
            self.datasetName = \
                self.datasetNames.get(self.currentGds["dataset_id"], "")
        else:
            self.currentGds = None
            self.nameEdit.setPlaceholderText("")
            self.datasetName = ""

        self.commitIf()

    def setAnnotations(self, gds):
        self._annotationsUpdating = True
        self.annotationsTree.clear()

        annotations = defaultdict(set)
        subsetscount = {}
        for desc in gds["subsets"]:
            annotations[desc["type"]].add(desc["description"])
            subsetscount[desc["description"]] = str(len(desc["sample_id"]))

        for type, subsets in annotations.items():
            key = (gds["dataset_id"], type)
            subsetItem = QTreeWidgetItem(self.annotationsTree, [type])
            subsetItem.setFlags(subsetItem.flags() | Qt.ItemIsUserCheckable
                                | Qt.ItemIsTristate)
            subsetItem.setCheckState(
                0, self.gdsSelectionStates.get(key, Qt.Checked))
            subsetItem.key = key
            for subset in subsets:
                key = (gds["dataset_id"], type, subset)
                item = QTreeWidgetItem(
                    subsetItem, [subset, subsetscount.get(subset, "")])
                item.setFlags(item.flags() | Qt.ItemIsUserCheckable)
                item.setCheckState(
                    0, self.gdsSelectionStates.get(key, Qt.Checked))
                item.key = key
        self._annotationsUpdating = False
        self.annotationsTree.expandAll()
        for i in range(self.annotationsTree.columnCount()):
            self.annotationsTree.resizeColumnToContents(i)

    def annotationSelectionChanged(self, item, column):
        if self._annotationsUpdating:
            return
        for i in range(self.annotationsTree.topLevelItemCount()):
            item = self.annotationsTree.topLevelItem(i)
            self.gdsSelectionStates[item.key] = item.checkState(0)
            for j in range(item.childCount()):
                child = item.child(j)
                self.gdsSelectionStates[child.key] = child.checkState(0)

    def filter(self):
        filter_string = unicode(self.filterLineEdit.text())
        proxyModel = self.treeWidget.model()
        if proxyModel:
            strings = filter_string.lower().strip().split()
            proxyModel.setFilterFixedStrings(strings)
            self.updateInfo()

    def selectedSamples(self):
        """
        Return the currently selected sample annotations.

        The return value is a list of selected (sample type, sample value)
        tuples.

        .. note:: if some Sample annotation type has no selected values.
                  this method will return all values for it.

        """
        samples = []
        unused_types = []
        used_types = []
        for stype in childiter(self.annotationsTree.invisibleRootItem()):
            selected_values = []
            all_values = []
            for sval in childiter(stype):
                value = (str(stype.text(0)), str(sval.text(0)))
                if self.gdsSelectionStates.get(sval.key, True):
                    selected_values.append(value)
                all_values.append(value)
            if selected_values:
                samples.extend(selected_values)
                used_types.append(str(stype.text(0)))
            else:
                # If no sample of sample type is selected we don't filter
                # on it.
                samples.extend(all_values)
                unused_types.append(str(stype.text(0)))

        return samples, used_types

    def commitIf(self):
        if self.autoCommit:
            self.commit()
        else:
            self.selectionChanged = True

    @Slot(int, int)
    def progressCompleted(self, value, total):
        if total > 0:
            self.progressBarSet(100. * value / total, processEvents=False)
        else:
            pass
            # TODO: report 'indeterminate progress'

    def commit(self):
        if self.currentGds:
            self.error(0)
            sample_type = None
            self.progressBarInit(processEvents=None)

            _, groups = self.selectedSamples()
            if len(groups) == 1 and self.outputRows:
                sample_type = groups[0]

            self.setEnabled(False)
            self.setBlocking(True)

            progress = methodinvoke(self, "progressCompleted", (int, int))

            def get_data(gds_id, report_genes, transpose, sample_type, title):
                gds_ensure_downloaded(gds_id, progress)
                gds = geo.GDS(gds_id)
                data = gds.getdata(report_genes=report_genes,
                                   transpose=transpose,
                                   sample_type=sample_type)
                data.name = title
                return data

            get_data = partial(get_data,
                               self.currentGds["dataset_id"],
                               report_genes=self.mergeSpots,
                               transpose=self.outputRows,
                               sample_type=sample_type,
                               title=self.datasetName
                               or self.currentGds["title"])
            self._datatask = Task(function=get_data)
            self._datatask.finished.connect(self._on_dataready)
            self._executor.submit(self._datatask)

    def _on_dataready(self):
        self.setEnabled(True)
        self.setBlocking(False)
        self.progressBarFinished(processEvents=False)

        try:
            data = self._datatask.result()
        except urlrequest.URLError as error:
            self.error(0, ("Error while connecting to the NCBI ftp server! "
                           "'%s'" % error))
            sys.excepthook(type(error), error, getattr(error, "__traceback__"))
            return
        finally:
            self._datatask = None

        data_name = data.name
        samples, _ = self.selectedSamples()

        self.warning(0)
        message = None
        if self.outputRows:

            def samplesinst(ex):
                out = []
                for meta in data.domain.metas:
                    out.append((meta.name, ex[meta].value))

                if data.domain.class_var.name != 'class':
                    out.append((data.domain.class_var.name,
                                ex[data.domain.class_var].value))

                return out

            samples = set(samples)
            mask = [samples.issuperset(samplesinst(ex)) for ex in data]
            data = data[numpy.array(mask, dtype=bool)]
            if len(data) == 0:
                message = "No samples with selected sample annotations."
        else:
            samples = set(samples)
            domain = Orange.data.Domain([
                attr for attr in data.domain.attributes
                if samples.issuperset(attr.attributes.items())
            ], data.domain.class_var, data.domain.metas)
            #             domain.addmetas(data.domain.getmetas())

            if len(domain.attributes) == 0:
                message = "No samples with selected sample annotations."
            stypes = set(s[0] for s in samples)
            for attr in domain.attributes:
                attr.attributes = dict(
                    (key, value) for key, value in attr.attributes.items()
                    if key in stypes)
            data = Orange.data.Table(domain, data)

        if message is not None:
            self.warning(0, message)

        data_hints.set_hint(data, "taxid", self.currentGds.get("taxid", ""),
                            10.0)
        data_hints.set_hint(data, "genesinrows", self.outputRows, 10.0)

        data.name = data_name
        self.send("Expression Data", data)

        model = self.treeWidget.model().sourceModel()
        row = self.gds.index(self.currentGds)

        model.setData(model.index(row, 0), " ", Qt.DisplayRole)

        self.updateInfo()
        self.selectionChanged = False

    def splitterMoved(self, *args):
        self.splitterSettings = [
            bytes(sp.saveState()) for sp in self.splitters
        ]

    def send_report(self):
        self.report_items("GEO Dataset",
                          [("ID", self.currentGds['dataset_id']),
                           ("Title", self.currentGds['title']),
                           ("Organism", self.currentGds['sample_organism'])])
        self.report_items("Data",
                          [("Samples", self.currentGds['sample_count']),
                           ("Features", self.currentGds['feature_count']),
                           ("Genes", self.currentGds['gene_count'])])
        self.report_name("Sample annotations")
        subsets = defaultdict(list)
        for subset in self.currentGds['subsets']:
            subsets[subset['type']].append(
                (subset['description'], len(subset['sample_id'])))
        self.report_html += "<ul>"
        for type in subsets:
            self.report_html += "<b>" + type + ":</b></br>"
            for desc, count in subsets[type]:
                self.report_html += 9 * "&nbsp" + "<b>{}:</b> {}</br>".format(
                    desc, count)
        self.report_html += "</ul>"

    def onDeleteWidget(self):
        if self._inittask:
            self._inittask.future().cancel()
            self._inittask.finished.disconnect(self._initializemodel)
        if self._datatask:
            self._datatask.future().cancel()
            self._datatask.finished.disconnect(self._on_dataready)
        self._executor.shutdown(wait=False)

        super(OWGEODatasets, self).onDeleteWidget()

    def onNameEdited(self):
        if self.currentGds:
            gds_id = self.currentGds["dataset_id"]
            self.datasetNames[gds_id] = unicode(self.nameEdit.text())
            self.commitIf()
class OWDatabasesUpdate(OWWidget):

    name = "Databases Update"
    description = "Update local systems biology databases."
    icon = "../widgets/icons/OWDatabasesUpdate.svg"
    priority = 1

    inputs = []
    outputs = []

    want_main_area = False

    def __init__(self,
                 parent=None,
                 signalManager=None,
                 name="Databases update"):
        OWWidget.__init__(self,
                          parent,
                          signalManager,
                          name,
                          wantMainArea=False)

        self.searchString = ""

        fbox = gui.widgetBox(self.controlArea, "Filter")
        self.completer = TokenListCompleter(self,
                                            caseSensitivity=Qt.CaseInsensitive)
        self.lineEditFilter = QLineEdit(textChanged=self.search_update)
        self.lineEditFilter.setCompleter(self.completer)

        fbox.layout().addWidget(self.lineEditFilter)

        box = gui.widgetBox(self.controlArea, "Files")
        self.filesView = QTreeWidget(self)
        self.filesView.setHeaderLabels(header_labels)
        self.filesView.setRootIsDecorated(False)
        self.filesView.setUniformRowHeights(True)
        self.filesView.setSelectionMode(QAbstractItemView.NoSelection)
        self.filesView.setSortingEnabled(True)
        self.filesView.sortItems(header.Title, Qt.AscendingOrder)
        self.filesView.setItemDelegateForColumn(
            0, UpdateOptionsItemDelegate(self.filesView))

        self.filesView.model().layoutChanged.connect(self.search_update)

        box.layout().addWidget(self.filesView)

        layout = QHBoxLayout()
        gui.widgetBox(self.controlArea, margin=0, orientation=layout)

        self.updateButton = gui.button(
            box,
            self,
            "Update all",
            callback=self.update_all,
            tooltip="Update all updatable files",
        )

        self.downloadButton = gui.button(
            box,
            self,
            "Download all",
            callback=self.download_filtered,
            tooltip="Download all filtered files shown")

        self.cancelButton = gui.button(
            box,
            self,
            "Cancel",
            callback=self.cancel_active_threads,
            tooltip="Cancel scheduled downloads/updates.")

        self.addButton = gui.button(box,
                                    self,
                                    "Add ...",
                                    callback=self.__handle_dialog,
                                    tooltip="Add files for personal use.")

        layout.addWidget(self.updateButton)
        layout.addWidget(self.downloadButton)
        layout.addWidget(self.cancelButton)
        layout.addStretch()
        layout.addWidget(self.addButton)

        # Enable retryButton once connection is established
        # self.retryButton = gui.button(
        #     box, self, "Reconnect", callback=self.initialize_files_view
        # )
        # self.retryButton.hide()

        self.resize(800, 600)

        self.update_items = []
        self._dialog = None
        self.progress_bar = None

        # threads
        self.threadpool = QThreadPool(self)
        #self.threadpool.setMaxThreadCount(1)
        self.workers = list()

        self.initialize_files_view()

    def __handle_dialog(self):
        if not self._dialog:
            self._dialog = FileUploadHelper(self)
        self._dialog.show()

    def __progress_advance(self):
        # GUI should be updated in main thread. That's why we are calling advance method here
        if self.progress_bar:
            self.progress_bar.advance()

    def handle_worker_exception(self, ex):
        self.progress_bar.finish()
        self.setStatusMessage('')

        if isinstance(ex, ConnectionError):
            # TODO: set warning messages
            pass

        print(ex)

    def initialize_files_view(self):
        # self.retryButton.hide()

        # clear view
        self.filesView.clear()
        # init progress bar
        self.progress_bar = gui.ProgressBar(self, iterations=3)
        # status message
        self.setStatusMessage('initializing')

        worker = Worker(evaluate_files_state, progress_callback=True)
        worker.signals.progress.connect(self.__progress_advance)
        worker.signals.result.connect(self.set_files_list)
        worker.signals.error.connect(self.handle_worker_exception)

        # move download process to worker thread
        self.threadpool.start(worker)
        self.setEnabled(False)

    def __create_action_button(self, fs, retry=None):
        if not fs.state not in [OUTDATED, USER_FILE] or not retry:
            self.filesView.setItemWidget(fs.tree_item, header.Update, None)

        button = QToolButton(None)
        if not retry:
            if fs.state == OUTDATED:
                button.setText('Update')
                button.clicked.connect(
                    partial(self.submit_download_task, fs.domain, fs.filename,
                            True))
            elif fs.state == USER_FILE:
                if not fs.info_server:
                    button.setText('Remove')
                    button.clicked.connect(
                        partial(self.submit_remove_task, fs.domain,
                                fs.filename))
                else:
                    button.setText('Use server version')
                    button.clicked.connect(
                        partial(self.submit_download_task, fs.domain,
                                fs.filename, True))
        else:
            button.setText('Retry')
            button.clicked.connect(
                partial(self.submit_download_task, fs.domain, fs.filename,
                        True))

        button.setMaximumWidth(120)
        button.setMaximumHeight(20)
        button.setMinimumHeight(20)

        if sys.platform == "darwin":
            button.setAttribute(Qt.WA_MacSmallSize)

        self.filesView.setItemWidget(fs.tree_item, header.Update, button)

    def set_files_list(self, result):
        """ Set the files to show.
        """
        assert threading.current_thread() == threading.main_thread()
        self.progress_bar.finish()
        self.setStatusMessage('')
        self.setEnabled(True)

        self.update_items = result
        all_tags = set()

        for fs in self.update_items:
            fs.tree_item = FileStateItem(fs)
            fs.download_option = DownloadOption(state=fs.state)

            fs.download_option.download_clicked.connect(
                partial(self.submit_download_task, fs.domain, fs.filename))
            fs.download_option.remove_clicked.connect(
                partial(self.submit_remove_task, fs.domain, fs.filename))

        # add widget items to the QTreeWidget
        self.filesView.addTopLevelItems(
            [fs.tree_item for fs in self.update_items])

        # add action widgets to tree items
        for fs in self.update_items:
            self.filesView.setItemWidget(fs.tree_item, header.Download,
                                         fs.download_option)
            if fs.state in [USER_FILE, OUTDATED]:
                self.__create_action_button(fs)

            all_tags.update(fs.tags)

        self.filesView.setColumnWidth(
            header.Download, self.filesView.sizeHintForColumn(header.Download))

        for column in range(1, len(header_labels)):
            self.filesView.resizeColumnToContents(column)

        hints = [hint for hint in sorted(all_tags) if not hint.startswith("#")]
        self.completer.setTokenList(hints)
        self.search_update()
        self.toggle_action_buttons()
        self.cancelButton.setEnabled(False)

    def toggle_action_buttons(self):
        selected_items = [
            fs for fs in self.update_items if not fs.tree_item.isHidden()
        ]

        def button_check(sel_items, state, button):
            for item in sel_items:
                if item.state != state:
                    button.setEnabled(False)
                else:
                    button.setEnabled(True)
                    break

        button_check(selected_items, OUTDATED, self.updateButton)
        button_check(selected_items, AVAILABLE, self.downloadButton)

    def search_update(self, searchString=None):
        strings = str(self.lineEditFilter.text()).split()
        for fs in self.update_items:
            hide = not all(UpdateItem_match(fs, string) for string in strings)
            fs.tree_item.setHidden(hide)
        self.toggle_action_buttons()

    def update_all(self):
        for fs in self.update_items:
            if fs.state == OUTDATED and not fs.tree_item.isHidden():
                self.submit_download_task(fs.domain, fs.filename)

    def download_filtered(self):
        for fs in self.update_items:
            if not fs.tree_item.isHidden() and fs.state in [
                    AVAILABLE, OUTDATED
            ]:
                self.submit_download_task(fs.domain, fs.filename, start=False)

        self.run_download_tasks()

    def submit_download_task(self, domain, filename, start=True):
        """ Submit the (domain, filename) to be downloaded/updated.
        """
        # get selected tree item
        index = self.tree_item_index(domain, filename)
        fs = self.update_items[index]

        worker = Worker(download_server_file,
                        fs,
                        index,
                        progress_callback=True)
        worker.signals.progress.connect(self.__progress_advance)
        worker.signals.result.connect(self.on_download_finished)
        worker.signals.error.connect(self.on_download_exception)

        self.workers.append(worker)

        if start:
            self.run_download_tasks()

    def run_download_tasks(self):
        self.cancelButton.setEnabled(True)
        # init progress bar

        self.progress_bar = gui.ProgressBar(self,
                                            iterations=len(self.workers) * 100)

        # status message
        self.setStatusMessage('downloading')

        # move workers to threadpool
        [self.threadpool.start(worker) for worker in self.workers]
        self.filesView.setDisabled(True)
        # reset list of workers
        self.workers = list()

    def on_download_exception(self, ex):
        assert threading.current_thread() == threading.main_thread()
        self.progress_bar.finish()
        self.setStatusMessage('')
        print(ex)
        if isinstance(ex, ValueError):
            fs, index = ex.args

            # restore state and retry
            fs.refresh_state()
            fs.tree_item.update_data(fs)
            fs.download_option.state = fs.state
            self.__create_action_button(fs, retry=True)

    def on_download_finished(self, result):
        assert threading.current_thread() == threading.main_thread()

        # We check if all workers have completed. If not, continue
        if self.progress_bar.count == 100 or self.threadpool.activeThreadCount(
        ) == 0:
            self.filesView.setDisabled(False)
            self.progress_bar.finish()
            self.setStatusMessage('')

        fs, index = result
        # re-evaluate File State
        info = serverfiles.info(fs.domain, fs.filename)
        fs.refresh_state(info_local=info, info_server=info)
        # reinitialize treeWidgetItem
        fs.tree_item.update_data(fs)
        # reinitialize OptionWidget
        fs.download_option.state = fs.state
        self.filesView.setItemWidget(fs.tree_item, header.Update, None)

        self.toggle_action_buttons()
        for column in range(1, len(header_labels)):
            self.filesView.resizeColumnToContents(column)

    def submit_remove_task(self, domain, filename):
        serverfiles.LOCALFILES.remove(domain, filename)

        index = self.tree_item_index(domain, filename)
        fs = self.update_items[index]

        if fs.state == USER_FILE:
            self.filesView.takeTopLevelItem(
                self.filesView.indexOfTopLevelItem(fs.tree_item))
            self.update_items.remove(fs)
            # self.filesView.removeItemWidget(index)
        else:
            # refresh item state
            fs.info_local = None
            fs.refresh_state()
            # reinitialize treeWidgetItem
            fs.tree_item.update_data(fs)
            # reinitialize OptionWidget
            fs.download_option.state = fs.state

        self.toggle_action_buttons()

    def cancel_active_threads(self):
        """ Cancel all pending update/download tasks (that have not yet started).
        """
        if self.threadpool:
            self.threadpool.clear()

    def tree_item_index(self, domain, filename):
        for i, fs in enumerate(self.update_items):
            if fs.domain == domain and fs.filename == filename:
                return i
        raise ValueError("%r, %r not in update list" % (domain, filename))

    def onDeleteWidget(self):
        self.cancel_active_threads()
        OWWidget.onDeleteWidget(self)
class OWDatabasesUpdate(OWWidget):

    name = "Databases Update"
    description = "Update local systems biology databases."
    icon = "../widgets/icons/OWDatabasesUpdate.svg"
    priority = 1

    inputs = []
    outputs = []

    want_main_area = False

    def __init__(self, parent=None, signalManager=None,
                 name="Databases update"):
        OWWidget.__init__(self, parent, signalManager, name,
                          wantMainArea=False)

        self.searchString = ""

        fbox = gui.widgetBox(self.controlArea, "Filter")
        self.completer = TokenListCompleter(
            self, caseSensitivity=Qt.CaseInsensitive)
        self.lineEditFilter = QLineEdit(textChanged=self.SearchUpdate)
        self.lineEditFilter.setCompleter(self.completer)

        fbox.layout().addWidget(self.lineEditFilter)

        box = gui.widgetBox(self.controlArea, "Files")

        self.filesView = QTreeWidget(self)
        self.filesView.setHeaderLabels(
            ["", "Data Source", "Update", "Last Updated", "Size"])

        self.filesView.setRootIsDecorated(False)
        self.filesView.setUniformRowHeights(True)
        self.filesView.setSelectionMode(QAbstractItemView.NoSelection)
        self.filesView.setSortingEnabled(True)
        self.filesView.sortItems(1, Qt.AscendingOrder)
        self.filesView.setItemDelegateForColumn(
            0, UpdateOptionsItemDelegate(self.filesView))

        self.filesView.model().layoutChanged.connect(self.SearchUpdate)

        box.layout().addWidget(self.filesView)

        box = gui.widgetBox(self.controlArea, orientation="horizontal")
        self.updateButton = gui.button(
            box, self, "Update all",
            callback=self.UpdateAll,
            tooltip="Update all updatable files",
         )

        self.downloadButton = gui.button(
            box, self, "Download all",
            callback=self.DownloadFiltered,
            tooltip="Download all filtered files shown"
        )

        self.cancelButton = gui.button(
            box, self, "Cancel", callback=self.Cancel,
            tooltip="Cancel scheduled downloads/updates."
        )

        self.retryButton = gui.button(
            box, self, "Reconnect", callback=self.RetrieveFilesList
        )
        self.retryButton.hide()

        gui.rubber(box)
        self.warning(0)

        box = gui.widgetBox(self.controlArea, orientation="horizontal")
        gui.rubber(box)

        self.infoLabel = QLabel()
        self.infoLabel.setAlignment(Qt.AlignCenter)

        self.controlArea.layout().addWidget(self.infoLabel)
        self.infoLabel.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Fixed)

        self.updateItems = []

        self.resize(800, 600)

        self.progress = ProgressState(self, maximum=3)
        self.progress.valueChanged.connect(self._updateProgress)
        self.progress.rangeChanged.connect(self._updateProgress)
        self.executor = ThreadExecutor(
            threadPool=QThreadPool(maxThreadCount=2)
        )

        task = Task(self, function=self.RetrieveFilesList)
        task.exceptionReady.connect(self.HandleError)
        task.start()

        self._tasks = []
        self._haveProgress = False

    def RetrieveFilesList(self):
        self.retryButton.hide()
        self.warning(0)
        self.progress.setRange(0, 3)

        task = Task(function=partial(retrieveFilesList, methodinvoke(self.progress, "advance")))

        task.resultReady.connect(self.SetFilesList)
        task.exceptionReady.connect(self.HandleError)

        self.executor.submit(task)

        self.setEnabled(False)

    def SetFilesList(self, serverInfo):
        """
        Set the files to show.
        """
        self.setEnabled(True)

        localInfo = serverfiles.allinfo()
        all_tags = set()

        self.filesView.clear()
        self.updateItems = []

        for item in join_info_dict(localInfo, serverInfo):
            tree_item = UpdateTreeWidgetItem(item)
            options_widget = UpdateOptionsWidget(item.state)
            options_widget.item = item

            options_widget.installClicked.connect(
                partial(self.SubmitDownloadTask, item.domain, item.filename)
            )
            options_widget.removeClicked.connect(
                partial(self.SubmitRemoveTask, item.domain, item.filename)
            )

            self.updateItems.append((item, tree_item, options_widget))
            all_tags.update(item.tags)

        self.filesView.addTopLevelItems(
            [tree_item for _, tree_item, _ in self.updateItems]
        )

        for item, tree_item, options_widget in self.updateItems:
            self.filesView.setItemWidget(tree_item, 0, options_widget)

            # Add an update button if the file is updateable
            if item.state == OUTDATED:
                button = QToolButton(
                    None, text="Update",
                    maximumWidth=120,
                    minimumHeight=20,
                    maximumHeight=20
                )

                if sys.platform == "darwin":
                    button.setAttribute(Qt.WA_MacSmallSize)

                button.clicked.connect(
                    partial(self.SubmitDownloadTask, item.domain,
                            item.filename)
                )

                self.filesView.setItemWidget(tree_item, 2, button)

        self.progress.advance()

        self.filesView.setColumnWidth(0, self.filesView.sizeHintForColumn(0))

        for column in range(1, 4):
            contents_hint = self.filesView.sizeHintForColumn(column)
            header_hint = self.filesView.header().sectionSizeHint(column)
            width = max(min(contents_hint, 400), header_hint)
            self.filesView.setColumnWidth(column, width)

        hints = [hint for hint in sorted(all_tags) if not hint.startswith("#")]
        self.completer.setTokenList(hints)
        self.SearchUpdate()
        self.UpdateInfoLabel()
        self.toggleButtons()
        self.cancelButton.setEnabled(False)

        self.progress.setRange(0, 0)

    def buttonCheck(self, selected_items, state, button):
        for item in selected_items:
            if item.state != state:
                button.setEnabled(False)
            else:
                button.setEnabled(True)
                break

    def toggleButtons(self):
        selected_items = [item for item, tree_item, _ in self.updateItems if not tree_item.isHidden()]
        self.buttonCheck(selected_items, OUTDATED, self.updateButton)
        self.buttonCheck(selected_items, AVAILABLE, self.downloadButton)

    def HandleError(self, exception):
        if isinstance(exception, ConnectionError):
            self.warning(0,
                       "Could not connect to server! Check your connection "
                       "and try to reconnect.")
            self.SetFilesList({})
            self.retryButton.show()
        else:
            sys.excepthook(type(exception), exception, None)
            self.progress.setRange(0, 0)
            self.setEnabled(True)

    def UpdateInfoLabel(self):
        local = [item for item, tree_item, _ in self.updateItems
                 if item.state != AVAILABLE and not tree_item.isHidden()]
        size = sum(float(item.size) for item in local)

        onServer = [item for item, tree_item, _ in self.updateItems if not tree_item.isHidden()]
        sizeOnServer = sum(float(item.size) for item in onServer)

        text = ("%i items, %s (on server: %i items, %s)" %
                (len(local),
                 serverfiles.sizeformat(size),
                 len(onServer),
                 serverfiles.sizeformat(sizeOnServer)))

        self.infoLabel.setText(text)

    def UpdateAll(self):
        self.warning(0)
        for item, tree_item, _ in self.updateItems:
            if item.state == OUTDATED and not tree_item.isHidden():
                self.SubmitDownloadTask(item.domain, item.filename)

    def DownloadFiltered(self):
        # TODO: submit items in the order shown.
        for item, tree_item, _ in self.updateItems:
            if not tree_item.isHidden() and item.state in \
                    [AVAILABLE, OUTDATED]:
                self.SubmitDownloadTask(item.domain, item.filename)

    def SearchUpdate(self, searchString=None):
        strings = str(self.lineEditFilter.text()).split()
        for item, tree_item, _ in self.updateItems:
            hide = not all(UpdateItem_match(item, string)
                           for string in strings)
            tree_item.setHidden(hide)
        self.UpdateInfoLabel()
        self.toggleButtons()

    def SubmitDownloadTask(self, domain, filename):
        """
        Submit the (domain, filename) to be downloaded/updated.
        """
        self.cancelButton.setEnabled(True)

        index = self.updateItemIndex(domain, filename)
        _, tree_item, opt_widget = self.updateItems[index]

        task = DownloadTask(domain, filename, serverfiles.LOCALFILES)

        self.progress.adjustRange(0, 100)

        pb = ItemProgressBar(self.filesView)
        pb.setRange(0, 100)
        pb.setTextVisible(False)

        task.advanced.connect(pb.advance)
        task.advanced.connect(self.progress.advance)
        task.finished.connect(pb.hide)
        task.finished.connect(self.onDownloadFinished, Qt.QueuedConnection)
        task.exception.connect(self.onDownloadError, Qt.QueuedConnection)

        self.filesView.setItemWidget(tree_item, 2, pb)

        # Clear the text so it does not show behind the progress bar.
        tree_item.setData(2, Qt.DisplayRole, "")
        pb.show()

        # Disable the options widget
        opt_widget.setEnabled(False)
        self._tasks.append(task)

        self.executor.submit(task)

    def EndDownloadTask(self, task):
        future = task.future()
        index = self.updateItemIndex(task.domain, task.filename)
        item, tree_item, opt_widget = self.updateItems[index]

        self.filesView.removeItemWidget(tree_item, 2)
        opt_widget.setEnabled(True)

        if future.cancelled():
            # Restore the previous state
            tree_item.setUpdateItem(item)
            opt_widget.setState(item.state)

        elif future.exception():
            tree_item.setUpdateItem(item)
            opt_widget.setState(item.state)

            # Show the exception string in the size column.
            self.warning(0, "Error while downloading. Check your connection "
                            "and retry.")

            # recreate button for download
            button = QToolButton(
                None, text="Retry",
                maximumWidth=120,
                minimumHeight=20,
                maximumHeight=20
            )

            if sys.platform == "darwin":
                button.setAttribute(Qt.WA_MacSmallSize)

            button.clicked.connect(
                partial(self.SubmitDownloadTask, item.domain,
                        item.filename)
            )

            self.filesView.setItemWidget(tree_item, 2, button)

        else:
            # get the new updated info dict and replace the the old item
            self.warning(0)
            info = serverfiles.info(item.domain, item.filename)
            new_item = update_item_from_info(item.domain, item.filename,
                                             info, info)

            self.updateItems[index] = (new_item, tree_item, opt_widget)

            tree_item.setUpdateItem(new_item)
            opt_widget.setState(new_item.state)

            self.UpdateInfoLabel()

    def SubmitRemoveTask(self, domain, filename):
        serverfiles.LOCALFILES.remove(domain, filename)
        index = self.updateItemIndex(domain, filename)
        item, tree_item, opt_widget = self.updateItems[index]

        if item.info_server:
            new_item = item._replace(state=AVAILABLE, local=None,
                                      info_local=None)
        else:
            new_item = item._replace(local=None, info_local=None)
            # Disable the options widget. No more actions can be performed
            # for the item.
            opt_widget.setEnabled(False)

        tree_item.setUpdateItem(new_item)
        opt_widget.setState(new_item.state)
        self.updateItems[index] = (new_item, tree_item, opt_widget)

        self.UpdateInfoLabel()

    def Cancel(self):
        """
        Cancel all pending update/download tasks (that have not yet started).
        """
        for task in self._tasks:
            task.future().cancel()

    def onDeleteWidget(self):
        self.Cancel()
        self.executor.shutdown(wait=False)
        OWWidget.onDeleteWidget(self)

    def onDownloadFinished(self):
        # on download completed/canceled/error
        assert QThread.currentThread() is self.thread()
        for task in list(self._tasks):
            future = task.future()
            if future.done():
                self.EndDownloadTask(task)
                self._tasks.remove(task)

        if not self._tasks:
            # Clear/reset the overall progress
            self.progress.setRange(0, 0)
            self.cancelButton.setEnabled(False)

    def onDownloadError(self, exc_info):
        sys.excepthook(*exc_info)
        self.warning(0, "Error while downloading. Check your connection and "
                        "retry.")

    def updateItemIndex(self, domain, filename):
        for i, (item, _, _) in enumerate(self.updateItems):
            if item.domain == domain and item.filename == filename:
                return i
        raise ValueError("%r, %r not in update list" % (domain, filename))

    def _updateProgress(self, *args):
        rmin, rmax = self.progress.range()
        if rmin != rmax:
            if not self._haveProgress:
                self._haveProgress = True
                self.progressBarInit()

            self.progressBarSet(self.progress.ratioCompleted() * 100,
                                processEvents=None)
        if rmin == rmax:
            self._haveProgress = False
            self.progressBarFinished()
Beispiel #6
0
class OWGenExpress(widget.OWWidget):
    name = "GenExpress"
    description = "Expression data from GenExpress."
    icon = "../widgets/icons/GenCloud.svg"
    priority = 36

    inputs = []
    outputs = [("Data", Orange.data.Table)]

    username = settings.Setting("anonymous")
    password = settings.Setting("")
    log2 = settings.Setting(False)
    transpose = settings.Setting(False)
    rtypei = settings.Setting(0)
    projecti = settings.Setting(0)
    serveri = settings.Setting(0)
    exnamei = settings.Setting(6)

    excludeconstant = settings.Setting(False)
    joinreplicates = settings.Setting(False)
    currentSelection = settings.Setting(None)

    experimentsHeaderState = settings.Setting(
        {name: False
         for _, name in HEADER[:ID_INDEX + 1]})

    storedSortOrder = settings.Setting([])
    storedSelections = settings.Setting([])

    def __init__(self, parent=None):
        super().__init__(parent)

        self.servers = [
            ('https://dictyexpress.research.bcm.edu/', 'dictyExpress'),
            ('https://cloud.genialis.com/', 'Genialis'),
        ]

        self.selectedExperiments = []
        self.buffer = dicty.CacheSQLite(bufferfile)

        self.searchString = ""

        self.items = []

        self.result_types = []

        self.controlArea.setMaximumWidth(250)
        self.controlArea.setMinimumWidth(250)

        box = gui.widgetBox(self.controlArea, 'Project')
        self.projectCB = gui.comboBox(box,
                                      self,
                                      "projecti",
                                      items=[],
                                      callback=self.ProjectChosen)

        self.projects = []

        b = gui.widgetBox(self.controlArea, "Selection bookmarks")
        self.selectionSetsWidget = SelectionSetsWidget(self)
        self.selectionSetsWidget.setSizePolicy(QSizePolicy.Preferred,
                                               QSizePolicy.Maximum)

        def store_selections(modified):
            if not modified:
                self.storedSelections = self.selectionSetsWidget.selections

        self.selectionSetsWidget.selectionModified.connect(store_selections)

        b.layout().addWidget(self.selectionSetsWidget)

        gui.separator(self.controlArea)

        b = gui.widgetBox(self.controlArea, "Sort output columns")
        self.columnsSortingWidget = SortedListWidget(self)
        self.columnsSortingWidget.setSizePolicy(QSizePolicy.Preferred,
                                                QSizePolicy.Maximum)

        box = gui.widgetBox(self.controlArea, 'Experiment name')
        self.experimentNameCB = gui.comboBox(box,
                                             self,
                                             "exnamei",
                                             items=SORTING_MODEL_LIST)

        b.layout().addWidget(self.columnsSortingWidget)
        sorting_model = QStringListModel(SORTING_MODEL_LIST)
        self.columnsSortingWidget.setModel(sorting_model)
        self.columnsSortingWidget.sortingOrder = self.storedSortOrder

        def store_sort_order():
            self.storedSortOrder = self.columnsSortingWidget.sortingOrder

        self.columnsSortingWidget.sortingOrderChanged.connect(store_sort_order)

        gui.separator(self.controlArea)

        box = gui.widgetBox(self.controlArea, 'Expression Type')
        self.expressionTypesCB = gui.comboBox(box,
                                              self,
                                              "rtypei",
                                              items=[],
                                              callback=self.UpdateResultsList)

        gui.checkBox(self.controlArea, self, "excludeconstant",
                     "Exclude labels with constant values")

        gui.checkBox(self.controlArea, self, "joinreplicates",
                     "Average replicates (use median)")

        gui.checkBox(self.controlArea, self, "log2",
                     "Logarithmic (base 2) transformation")

        gui.checkBox(self.controlArea, self, "transpose", "Genes as columns")

        self.commit_button = gui.button(self.controlArea,
                                        self,
                                        "&Commit",
                                        callback=self.Commit)
        self.commit_button.setDisabled(True)

        gui.rubber(self.controlArea)

        box = gui.widgetBox(self.controlArea, 'Server')
        gui.comboBox(box,
                     self,
                     "serveri",
                     items=[title for url, title in self.servers],
                     callback=self.ServerChosen)

        gui.lineEdit(box,
                     self,
                     "username",
                     "Username:"******"password",
                                  "Password:"******"Clear cache",
                   callback=self.clear_cache)

        gui.lineEdit(self.mainArea,
                     self,
                     "searchString",
                     "Search",
                     callbackOnType=True,
                     callback=self.SearchUpdate)

        self.headerLabels = [t[1] for t in HEADER]

        self.experimentsWidget = QTreeWidget()
        self.experimentsWidget.setHeaderLabels(self.headerLabels)
        self.experimentsWidget.setSelectionMode(QTreeWidget.ExtendedSelection)
        self.experimentsWidget.setRootIsDecorated(False)
        self.experimentsWidget.setSortingEnabled(True)

        contextEventFilter = gui.VisibleHeaderSectionContextEventFilter(
            self.experimentsWidget, self.experimentsWidget)

        self.experimentsWidget.header().installEventFilter(contextEventFilter)
        self.experimentsWidget.setItemDelegateForColumn(
            0, gui.IndicatorItemDelegate(self, role=Qt.DisplayRole))

        self.experimentsWidget.setAlternatingRowColors(True)

        self.experimentsWidget.selectionModel().selectionChanged.connect(
            self.onSelectionChanged)

        self.selectionSetsWidget.setSelectionModel(
            self.experimentsWidget.selectionModel())
        self.selectionSetsWidget.setSelections(self.storedSelections)

        self.mainArea.layout().addWidget(self.experimentsWidget)

        self.restoreHeaderState()

        self.experimentsWidget.header().geometriesChanged.connect(
            self.saveHeaderState)

        self.dbc = None

        self.AuthSet()

        QTimer.singleShot(100, self.ConnectAndUpdate)

    def sizeHint(self):
        return QSize(800, 600)

    def AuthSet(self):
        if len(self.username):
            self.passf.setDisabled(False)
        else:
            self.passf.setDisabled(True)

    def AuthChanged(self):
        self.AuthSet()
        self.ConnectAndUpdate()

    def ConnectAndUpdate(self):
        self.Connect()
        if self.dbc:

            def get_data_count(project_id):
                # XXX: is there a better way?
                # Note: limit 0 would return all objects
                return self.dbc.gen.api.data.get(
                    case_ids__contains=project_id,
                    type__startswith='data:expression:',
                    limit=1)['meta']['total_count']

            self.projects = sorted([
                p for p in self.dbc.projects().items()
                if get_data_count(p[0]) > 0
            ],
                                   key=lambda x: x[1])
            self.UpdateProjects()
            self.ProjectChosen()
            self.UpdateExperimentTypes()

    def Connect(self):
        self.error(1)
        self.warning(1)

        username = '******'
        password = '******'
        url = self.servers[self.serveri][0]

        if self.username:
            username = self.username
            password = self.password

        if username.lower() in ['*****@*****.**', 'anonymous']:
            username = '******'
            password = '******'

        self.dbc = None
        self.projects = []
        self.result_types = []

        try:
            self.dbc = Genesis(address=url,
                               username=username,
                               password=password,
                               cache=self.buffer)
        except requests.exceptions.ConnectionError:
            self.dbc = Genesis(address=url,
                               username=username,
                               password=password,
                               connect=False,
                               cache=self.buffer)
            self.warning(1, "Could not connect to server, working from cache.")
        except Exception:
            self.error(1, "Wrong username or password.")

        self.UpdateProjects()
        self.UpdateExperimentTypes()  # clear lists

    def Reload(self):
        self.UpdateExperiments(reload=True)

    def clear_cache(self):
        self.buffer.clear()
        self.Reload()

    def rtype(self):
        """Return selected result template type """
        if self.result_types:
            return self.result_types[self.rtypei]
        else:
            return None

    def UpdateExperimentTypes(self):
        self.expressionTypesCB.clear()
        items = [self.result_types_labels[desc] for desc in self.result_types]
        self.expressionTypesCB.addItems(items)
        #do not update anything if the list is empty
        if len(self.result_types):
            self.rtypei = max(0, min(self.rtypei, len(self.result_types) - 1))

    def UpdateProjects(self):
        self.projectCB.clear()
        items = [desc for pid, desc in self.projects]
        self.projectCB.addItems(items)
        #do not update anything if the list if empty
        if len(self.projects) > 0:
            self.projecti = max(0, min(self.projecti, len(self.projects) - 1))

    def UpdateExperiments(self, reload=False):

        self.experimentsWidget.clear()

        if not self.dbc or not self.dbc.projectid:  # the connection did not succeed
            return

        self.items = []

        self.progressBarInit()

        result_types = []
        result_types_labels = []

        sucind = False  # success indicator for database index

        try:
            result_types, result_types_labels = self.dbc.result_types(
                reload=reload)
            sucind = True
        except Exception:
            try:
                result_types, result_types_labels = self.dbc.result_types()
                self.warning(0, "Can not access database - using cached data.")
                sucind = True
            except Exception:
                self.error(0, "Can not access database.")

        if sucind:
            self.warning(0)
            self.error(0)

        self.result_types = result_types
        self.result_types_labels = result_types_labels

        self.UpdateExperimentTypes()
        self.UpdateResultsList(reload=reload)

        self.progressBarFinished()

        if self.currentSelection:
            self.currentSelection.select(
                self.experimentsWidget.selectionModel())

        self.handle_commit_button()

    def ProjectChosen(self, reload=False):
        if self.projects:
            self.dbc.projectid = self.projects[self.projecti][0]
        else:
            self.dbc.projectid = None

        self.UpdateExperiments(reload=reload)

    def ServerChosen(self):
        self.ConnectAndUpdate()

    def UpdateResultsList(self, reload=False):
        results_list = self.dbc.results_list(self.rtype(), reload=reload)
        try:
            results_list = self.dbc.results_list(self.rtype(), reload=reload)
        except Exception:
            try:
                results_list = self.dbc.results_list(self.rtype())
            except Exception:
                self.error(0, "Can not access database.")

        self.results_list = results_list

        #softly change the view so that the selection stays the same

        items_shown = {}
        for i, item in enumerate(self.items):
            c = str(item.text(ID_INDEX))
            items_shown[c] = i

        items_to_show = set(id_ for id_ in self.results_list)

        add_items = set(items_to_show) - set(items_shown)
        delete_items = set(items_shown) - set(items_to_show)

        i = 0
        while i < self.experimentsWidget.topLevelItemCount():
            it = self.experimentsWidget.topLevelItem(i)
            if str(it.text(ID_INDEX)) in delete_items:
                self.experimentsWidget.takeTopLevelItem(i)
            else:
                i += 1

        delete_ind = set([items_shown[i] for i in delete_items])
        self.items = [
            it for i, it in enumerate(self.items) if i not in delete_ind
        ]

        for r_annot in add_items:
            d = defaultdict(lambda: "?", self.results_list[r_annot])
            row_items = [""] + [
                to_text(d.get(key, "?")) for key, _ in HEADER[1:]
            ]
            row_items[ID_INDEX] = r_annot

            ci = MyTreeWidgetItem(self.experimentsWidget, row_items)
            self.items.append(ci)

        for i in range(len(self.headerLabels)):
            self.experimentsWidget.resizeColumnToContents(i)

        self.wantbufver = lambda x: self.results_list[x]["date_modified"]

        self.UpdateCached()

    def UpdateCached(self):
        if self.wantbufver and self.dbc:

            for item in self.items:
                id = str(item.text(ID_INDEX))
                version = self.dbc._in_buffer(id + "|||" + self.rtype())
                value = " " if version == self.wantbufver(id) else ""
                item.setData(0, Qt.DisplayRole, value)

    def SearchUpdate(self, string=""):
        for item in self.items:
            item.setHidden(not all(s in item
                                   for s in self.searchString.split()))

    def Commit(self):

        pb = gui.ProgressBar(self, iterations=100)

        table = None

        ids = []
        for item in self.experimentsWidget.selectedItems():
            unique_id = str(item.text(ID_INDEX))
            ids.append(unique_id)

        transfn = None
        if self.log2:
            transfn = lambda x: math.log(x + 1.0, 2)

        reverse_header_dict = {name: name for key, name in HEADER}
        reverse_header_dict["ID"] = "id"

        allowed_labels = None

        def namefn(a):
            name = SORTING_MODEL_LIST[self.exnamei]
            name = reverse_header_dict.get(name, "id")
            return dict(a)[name]

        if len(ids):
            table = self.dbc.get_data(
                ids=ids,
                result_type=self.rtype(),
                callback=pb.advance,
                exclude_constant_labels=self.excludeconstant,
                bufver=self.wantbufver,
                transform=transfn,
                allowed_labels=allowed_labels,
                namefn=namefn)

            if self.joinreplicates:
                table = dicty.join_replicates(
                    table,
                    ignorenames=self.dbc.IGNORE_REPLICATE,
                    namefn="name",
                    avg=dicty.median,
                    fnshow=lambda x: " | ".join(map(str, x)))

            # Sort attributes
            sortOrder = self.columnsSortingWidget.sortingOrder

            all_values = defaultdict(set)
            for at in table.domain.attributes:
                atts = at.attributes
                for name in sortOrder:
                    all_values[name].add(
                        atts.get(reverse_header_dict[name], ""))

            isnum = {}
            for at, vals in all_values.items():
                vals = filter(None, vals)
                try:
                    for a in vals:
                        float(a)
                    isnum[at] = True
                except ValueError:
                    isnum[at] = False

            def optfloat(x, at):
                if x == "":
                    return ""
                else:
                    return float(x) if isnum[at] else x

            def sorting_key(attr):
                atts = attr.attributes
                return tuple([
                    optfloat(atts.get(reverse_header_dict[name], ""), name)
                    for name in sortOrder
                ])

            attributes = sorted(table.domain.attributes, key=sorting_key)

            domain = Orange.data.Domain(attributes, table.domain.class_vars,
                                        table.domain.metas)

            table = Orange.data.Table.from_table(domain, table)
            table = Orange.data.Table(domain, table)

            if self.transpose:
                experiments = [at for at in table.domain.variables]
                attr = [
                    compat.ContinuousVariable.make(ex['DDB'].value)
                    for ex in table
                ]
                metavars = sorted(table.domain.variables[0].attributes.keys())
                metavars = [
                    compat.StringVariable.make(name) for name in metavars
                ]
                domain = compat.create_domain(attr, None, metavars)
                metavars = compat.get_metas(domain)
                metas = [[exp.attributes[var.name] for var in metavars]
                         for exp in experiments]
                table = compat.create_table(domain, table.X.transpose(), None,
                                            metas)

            data_hints.set_hint(table, "taxid", "352472")
            data_hints.set_hint(table, "genesinrows", False)

            self.send("Data", table)

            self.UpdateCached()

        pb.finish()

    def onSelectionChanged(self, selected, deselected):
        self.handle_commit_button()

    def handle_commit_button(self):
        self.currentSelection = \
            SelectionByKey(self.experimentsWidget.selectionModel().selection(),
                           key=(ID_INDEX,))
        self.commit_button.setDisabled(not len(self.currentSelection))

    def saveHeaderState(self):
        hview = self.experimentsWidget.header()
        for i, label in enumerate(self.headerLabels):
            self.experimentsHeaderState[label] = hview.isSectionHidden(i)

    def restoreHeaderState(self):
        hview = self.experimentsWidget.header()
        state = self.experimentsHeaderState
        for i, label in enumerate(self.headerLabels):
            hview.setSectionHidden(i, state.get(label, True))
            self.experimentsWidget.resizeColumnToContents(i)
Beispiel #7
0
class OWGenExpress(widget.OWWidget):
    name = "GenExpress"
    description = "Expression data from GenExpress."
    icon = "../widgets/icons/GenCloud.svg"
    priority = 36

    inputs = []
    outputs = [("Data", Orange.data.Table)]

    username = settings.Setting("anonymous")
    password = settings.Setting("")
    log2 = settings.Setting(False)
    transpose = settings.Setting(False)
    rtypei = settings.Setting(0)
    projecti = settings.Setting(0)
    serveri = settings.Setting(0)
    exnamei = settings.Setting(6)

    excludeconstant = settings.Setting(False)
    joinreplicates = settings.Setting(False)
    currentSelection = settings.Setting(None)

    experimentsHeaderState = settings.Setting({
        name: False for _, name in HEADER[:ID_INDEX + 1]}
    )

    storedSortOrder = settings.Setting([])
    storedSelections = settings.Setting([])

    def __init__(self, parent=None):
        super().__init__(parent)

        self.servers = [
            ('https://dictyexpress.research.bcm.edu/', 'dictyExpress'),
            ('https://cloud.genialis.com/', 'Genialis'),
        ]

        self.selectedExperiments = []
        self.buffer = dicty.CacheSQLite(bufferfile)

        self.searchString = ""

        self.items = []

        self.result_types = []

        self.controlArea.setMaximumWidth(250)
        self.controlArea.setMinimumWidth(250)

        box = gui.widgetBox(self.controlArea, 'Project')
        self.projectCB = gui.comboBox(
            box, self, "projecti", items=[], callback=self.ProjectChosen)

        self.projects = []

        b = gui.widgetBox(self.controlArea, "Selection bookmarks")
        self.selectionSetsWidget = SelectionSetsWidget(self)
        self.selectionSetsWidget.setSizePolicy(
            QSizePolicy.Preferred, QSizePolicy.Maximum)

        def store_selections(modified):
            if not modified:
                self.storedSelections = self.selectionSetsWidget.selections
        self.selectionSetsWidget.selectionModified.connect(store_selections)

        b.layout().addWidget(self.selectionSetsWidget)

        gui.separator(self.controlArea)

        b = gui.widgetBox(self.controlArea, "Sort output columns")
        self.columnsSortingWidget = SortedListWidget(self)
        self.columnsSortingWidget.setSizePolicy(
            QSizePolicy.Preferred, QSizePolicy.Maximum)

        box = gui.widgetBox(self.controlArea, 'Experiment name')
        self.experimentNameCB = gui.comboBox(
            box, self, "exnamei", items=SORTING_MODEL_LIST)

        b.layout().addWidget(self.columnsSortingWidget)
        sorting_model = QStringListModel(SORTING_MODEL_LIST)
        self.columnsSortingWidget.setModel(sorting_model)
        self.columnsSortingWidget.sortingOrder = self.storedSortOrder

        def store_sort_order():
            self.storedSortOrder = self.columnsSortingWidget.sortingOrder
        self.columnsSortingWidget.sortingOrderChanged.connect(store_sort_order)

        gui.separator(self.controlArea)


        box = gui.widgetBox(self.controlArea, 'Expression Type')
        self.expressionTypesCB = gui.comboBox(
            box, self, "rtypei", items=[], callback=self.UpdateResultsList)

        gui.checkBox(self.controlArea, self, "excludeconstant",
                     "Exclude labels with constant values")

        gui.checkBox(self.controlArea, self, "joinreplicates",
                     "Average replicates (use median)")

        gui.checkBox(self.controlArea, self, "log2",
                     "Logarithmic (base 2) transformation")

        gui.checkBox(self.controlArea, self, "transpose",
                     "Genes as columns")

        self.commit_button = gui.button(self.controlArea, self, "&Commit",
                                        callback=self.Commit)
        self.commit_button.setDisabled(True)

        gui.rubber(self.controlArea)

        box = gui.widgetBox(self.controlArea, 'Server')
        gui.comboBox(box, self, "serveri",
                     items=[title for url, title in self.servers],
                     callback=self.ServerChosen)

        gui.lineEdit(box, self, "username", "Username:"******"password", "Password:"******"Clear cache",
                   callback=self.clear_cache)

        gui.lineEdit(self.mainArea, self, "searchString", "Search",
                     callbackOnType=True,
                     callback=self.SearchUpdate)

        self.headerLabels = [t[1] for t in HEADER]

        self.experimentsWidget = QTreeWidget()
        self.experimentsWidget.setHeaderLabels(self.headerLabels)
        self.experimentsWidget.setSelectionMode(QTreeWidget.ExtendedSelection)
        self.experimentsWidget.setRootIsDecorated(False)
        self.experimentsWidget.setSortingEnabled(True)

        contextEventFilter = gui.VisibleHeaderSectionContextEventFilter(
            self.experimentsWidget, self.experimentsWidget)

        self.experimentsWidget.header().installEventFilter(contextEventFilter)
        self.experimentsWidget.setItemDelegateForColumn(
            0, gui.IndicatorItemDelegate(self, role=Qt.DisplayRole))

        self.experimentsWidget.setAlternatingRowColors(True)

        self.experimentsWidget.selectionModel().selectionChanged.connect(
            self.onSelectionChanged)

        self.selectionSetsWidget.setSelectionModel(
            self.experimentsWidget.selectionModel())
        self.selectionSetsWidget.setSelections(self.storedSelections)

        self.mainArea.layout().addWidget(self.experimentsWidget)

        self.restoreHeaderState()

        self.experimentsWidget.header().geometriesChanged.connect(
            self.saveHeaderState)

        self.dbc = None

        self.AuthSet()

        QTimer.singleShot(100, self.ConnectAndUpdate)

    def sizeHint(self):
        return QSize(800, 600)

    def AuthSet(self):
        if len(self.username):
            self.passf.setDisabled(False)
        else:
            self.passf.setDisabled(True)

    def AuthChanged(self):
        self.AuthSet()
        self.ConnectAndUpdate()

    def ConnectAndUpdate(self):
        self.Connect()
        if self.dbc:
            def get_data_count(project_id):
                # XXX: is there a better way?
                # Note: limit 0 would return all objects
                return self.dbc.gen.api.data.get(case_ids__contains=project_id,
                                                 type__startswith='data:expression:',
                                                 limit=1)['meta']['total_count']

            self.projects = sorted([p for p in self.dbc.projects().items() if
                                    get_data_count(p[0]) > 0], key=lambda x: x[1])
            self.UpdateProjects()
            self.ProjectChosen()
            self.UpdateExperimentTypes()

    def Connect(self):
        self.error(1)
        self.warning(1)

        username = '******'
        password = '******'
        url = self.servers[self.serveri][0]

        if self.username:
            username = self.username
            password = self.password

        if username.lower() in ['*****@*****.**', 'anonymous']:
            username = '******'
            password = '******'

        self.dbc = None
        self.projects = []
        self.result_types = []

        try:
            self.dbc = Genesis(
                address=url, username=username, password=password,
                cache=self.buffer)
        except requests.exceptions.ConnectionError:
            self.dbc = Genesis(
                address=url, username=username, password=password,
                connect=False, cache=self.buffer)
            self.warning(1, "Could not connect to server, working from cache.")
        except Exception:
            self.error(1, "Wrong username or password.")

        self.UpdateProjects()
        self.UpdateExperimentTypes()  # clear lists

    def Reload(self):
        self.UpdateExperiments(reload=True)

    def clear_cache(self):
        self.buffer.clear()
        self.Reload()

    def rtype(self):
        """Return selected result template type """
        if self.result_types:
            return self.result_types[self.rtypei]
        else:
            return None

    def UpdateExperimentTypes(self):
        self.expressionTypesCB.clear()
        items = [self.result_types_labels[desc] for desc in self.result_types]
        self.expressionTypesCB.addItems(items)
        #do not update anything if the list is empty
        if len(self.result_types):
            self.rtypei = max(0, min(self.rtypei, len(self.result_types) - 1))

    def UpdateProjects(self):
        self.projectCB.clear()
        items = [desc for pid, desc in self.projects]
        self.projectCB.addItems(items)
        #do not update anything if the list if empty
        if len(self.projects) > 0:
            self.projecti = max(0, min(self.projecti, len(self.projects) - 1))

    def UpdateExperiments(self, reload=False):

        self.experimentsWidget.clear()

        if not self.dbc or not self.dbc.projectid:  # the connection did not succeed
            return

        self.items = []

        self.progressBarInit()

        result_types = []
        result_types_labels = []

        sucind = False  # success indicator for database index

        try:
            result_types, result_types_labels = self.dbc.result_types(reload=reload)
            sucind = True
        except Exception:
            try:
                result_types, result_types_labels = self.dbc.result_types()
                self.warning(0, "Can not access database - using cached data.")
                sucind = True
            except Exception:
                self.error(0, "Can not access database.")

        if sucind:
            self.warning(0)
            self.error(0)

        self.result_types = result_types
        self.result_types_labels = result_types_labels

        self.UpdateExperimentTypes()
        self.UpdateResultsList(reload=reload)

        self.progressBarFinished()

        if self.currentSelection:
            self.currentSelection.select(self.experimentsWidget.selectionModel())

        self.handle_commit_button()

    def ProjectChosen(self, reload=False):
        if self.projects:
            self.dbc.projectid = self.projects[self.projecti][0]
        else:
            self.dbc.projectid = None

        self.UpdateExperiments(reload=reload)

    def ServerChosen(self):
        self.ConnectAndUpdate()

    def UpdateResultsList(self, reload=False):
        results_list = self.dbc.results_list(self.rtype(), reload=reload)
        try:
            results_list = self.dbc.results_list(self.rtype(), reload=reload)
        except Exception:
            try:
                results_list = self.dbc.results_list(self.rtype())
            except Exception:
                self.error(0, "Can not access database.")

        self.results_list = results_list

        #softly change the view so that the selection stays the same

        items_shown = {}
        for i, item in enumerate(self.items):
            c = str(item.text(ID_INDEX))
            items_shown[c] = i

        items_to_show = set(id_ for id_ in self.results_list)

        add_items = set(items_to_show) - set(items_shown)
        delete_items = set(items_shown) - set(items_to_show)

        i = 0
        while i < self.experimentsWidget.topLevelItemCount():
            it = self.experimentsWidget.topLevelItem(i)
            if str(it.text(ID_INDEX)) in delete_items:
                self.experimentsWidget.takeTopLevelItem(i)
            else:
                i += 1

        delete_ind = set([items_shown[i] for i in delete_items])
        self.items = [it for i, it in enumerate(self.items)
                      if i not in delete_ind]

        for r_annot in add_items:
            d = defaultdict(lambda: "?", self.results_list[r_annot])
            row_items = [""] + [to_text(d.get(key, "?"))
                                for key, _ in HEADER[1:]]
            row_items[ID_INDEX] = r_annot

            ci = MyTreeWidgetItem(self.experimentsWidget, row_items)
            self.items.append(ci)

        for i in range(len(self.headerLabels)):
            self.experimentsWidget.resizeColumnToContents(i)

        self.wantbufver = lambda x: self.results_list[x]["date_modified"]

        self.UpdateCached()

    def UpdateCached(self):
        if self.wantbufver and self.dbc:

            for item in self.items:
                id = str(item.text(ID_INDEX))
                version = self.dbc._in_buffer(id + "|||" + self.rtype())
                value = " " if version == self.wantbufver(id) else ""
                item.setData(0, Qt.DisplayRole, value)

    def SearchUpdate(self, string=""):
        for item in self.items:
            item.setHidden(
                not all(s in item for s in self.searchString.split()))

    def Commit(self):

        pb = gui.ProgressBar(self, iterations=100)

        table = None

        ids = []
        for item in self.experimentsWidget.selectedItems():
            unique_id = str(item.text(ID_INDEX))
            ids.append(unique_id)

        transfn = None
        if self.log2:
            transfn = lambda x: math.log(x + 1.0, 2)

        reverse_header_dict = {name: name for key, name in HEADER}
        reverse_header_dict["ID"] = "id"

        allowed_labels = None

        def namefn(a):
            name = SORTING_MODEL_LIST[self.exnamei]
            name = reverse_header_dict.get(name, "id")
            return dict(a)[name]

        if len(ids):
            table = self.dbc.get_data(
                ids=ids, result_type=self.rtype(),
                callback=pb.advance,
                exclude_constant_labels=self.excludeconstant,
                bufver=self.wantbufver,
                transform=transfn,
                allowed_labels=allowed_labels,
                namefn=namefn)

            if self.joinreplicates:
                table = dicty.join_replicates(table,
                    ignorenames=self.dbc.IGNORE_REPLICATE,
                    namefn="name",
                    avg=dicty.median,
                    fnshow=lambda x: " | ".join(map(str, x)))

            # Sort attributes
            sortOrder = self.columnsSortingWidget.sortingOrder

            all_values = defaultdict(set)
            for at in table.domain.attributes:
                atts = at.attributes
                for name in sortOrder:
                    all_values[name].add(atts.get(reverse_header_dict[name], ""))

            isnum = {}
            for at, vals in all_values.items():
                vals = filter(None, vals)
                try:
                    for a in vals:
                        float(a)
                    isnum[at] = True
                except ValueError:
                    isnum[at] = False

            def optfloat(x, at):
                if x == "":
                    return ""
                else:
                    return float(x) if isnum[at] else x

            def sorting_key(attr):
                atts = attr.attributes
                return tuple([optfloat(atts.get(reverse_header_dict[name], ""), name)
                              for name in sortOrder])

            attributes = sorted(table.domain.attributes, key=sorting_key)

            domain = Orange.data.Domain(
                attributes, table.domain.class_vars, table.domain.metas)

            table = Orange.data.Table.from_table(domain, table)
            table = Orange.data.Table(domain, table)

            if self.transpose:
                experiments = [at for at in table.domain.variables]
                attr = [compat.ContinuousVariable.make(ex['DDB'].value) for ex in table]
                metavars = sorted(table.domain.variables[0].attributes.keys())
                metavars = [compat.StringVariable.make(name) for name in metavars]
                domain = compat.create_domain(attr, None, metavars)
                metas = [[exp.attributes[var.name] for var in metavars] for exp in experiments]
                table = compat.create_table(domain, table.X.transpose(), None, metas)

            data_hints.set_hint(table, "taxid", "352472")
            data_hints.set_hint(table, "genesinrows", False)

            self.send("Data", table)

            self.UpdateCached()

        pb.finish()

    def onSelectionChanged(self, selected, deselected):
        self.handle_commit_button()

    def handle_commit_button(self):
        self.currentSelection = \
            SelectionByKey(self.experimentsWidget.selectionModel().selection(),
                           key=(ID_INDEX,))
        self.commit_button.setDisabled(not len(self.currentSelection))

    def saveHeaderState(self):
        hview = self.experimentsWidget.header()
        for i, label in enumerate(self.headerLabels):
            self.experimentsHeaderState[label] = hview.isSectionHidden(i)

    def restoreHeaderState(self):
        hview = self.experimentsWidget.header()
        state = self.experimentsHeaderState
        for i, label in enumerate(self.headerLabels):
            hview.setSectionHidden(i, state.get(label, True))
            self.experimentsWidget.resizeColumnToContents(i)
class OWGEODatasets(OWWidget):
    name = "GEO Data Sets"
    description = DESCRIPTION
    icon = "../widgets/icons/GEODataSets.svg"
    priority = PRIORITY

    inputs = []
    outputs = [("Expression Data", Orange.data.Table)]

    settingsList = ["outputRows", "mergeSpots", "gdsSelectionStates",
                    "splitterSettings", "currentGds", "autoCommit",
                    "datasetNames"]

    outputRows = Setting(True)
    mergeSpots = Setting(True)
    gdsSelectionStates = Setting({})
    currentGds = Setting(None)
    datasetNames = Setting({})
    splitterSettings = Setting(
        (b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xea\x00\x00\x00\xd7\x01\x00\x00\x00\x07\x01\x00\x00\x00\x02',
         b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xb5\x00\x00\x02\x10\x01\x00\x00\x00\x07\x01\x00\x00\x00\x01')
    )

    autoCommit = Setting(False)

    def __init__(self, parent=None, signalManager=None, name=" GEO Data Sets"):
        OWWidget.__init__(self, parent, signalManager, name)

        self.selectionChanged = False
        self.filterString = ""
        self.datasetName = ""

        ## GUI
        box = gui.widgetBox(self.controlArea, "Info", addSpace=True)
        self.infoBox = gui.widgetLabel(box, "Initializing\n\n")

        box = gui.widgetBox(self.controlArea, "Output", addSpace=True)
        gui.radioButtonsInBox(box, self, "outputRows",
                              ["Genes in rows", "Samples in rows"], "Rows",
                              callback=self.commitIf)

        gui.checkBox(box, self, "mergeSpots", "Merge spots of same gene",
                     callback=self.commitIf)

        gui.separator(box)
        self.nameEdit = gui.lineEdit(
            box, self, "datasetName", "Data set name",
            tooltip="Override the default output data set name",
            callback=self.onNameEdited
        )
        self.nameEdit.setPlaceholderText("")

        if sys.version_info < (3, ):
            box = gui.widgetBox(self.controlArea, "Commit", addSpace=True)
            self.commitButton = gui.button(
                box, self, "Commit", callback=self.commit)
            cb = gui.checkBox(box, self, "autoCommit", "Commit on any change")
            gui.setStopper(self, self.commitButton, cb, "selectionChanged",
                           self.commit)
        else:
            gui.auto_commit(self.controlArea, self, "autoCommit", "Commit",
                            box="Commit")
            self.commitIf = self.commit

        gui.rubber(self.controlArea)

        gui.widgetLabel(self.mainArea, "Filter")
        self.filterLineEdit = QLineEdit(
            textChanged=self.filter
        )
        self.completer = TokenListCompleter(
            self, caseSensitivity=Qt.CaseInsensitive
        )
        self.filterLineEdit.setCompleter(self.completer)

        self.mainArea.layout().addWidget(self.filterLineEdit)

        splitter = QSplitter(Qt.Vertical, self.mainArea)
        self.mainArea.layout().addWidget(splitter)
        self.treeWidget = QTreeView(splitter)

        self.treeWidget.setSelectionMode(QTreeView.SingleSelection)
        self.treeWidget.setRootIsDecorated(False)
        self.treeWidget.setSortingEnabled(True)
        self.treeWidget.setAlternatingRowColors(True)
        self.treeWidget.setUniformRowHeights(True)
        self.treeWidget.setEditTriggers(QTreeView.NoEditTriggers)

        linkdelegate = LinkStyledItemDelegate(self.treeWidget)
        self.treeWidget.setItemDelegateForColumn(1, linkdelegate)
        self.treeWidget.setItemDelegateForColumn(8, linkdelegate)
        self.treeWidget.setItemDelegateForColumn(
            0, gui.IndicatorItemDelegate(self.treeWidget,
                                         role=Qt.DisplayRole))

        proxyModel = MySortFilterProxyModel(self.treeWidget)
        self.treeWidget.setModel(proxyModel)
        self.treeWidget.selectionModel().selectionChanged.connect(
            self.updateSelection
        )
        self.treeWidget.viewport().setMouseTracking(True)

        splitterH = QSplitter(Qt.Horizontal, splitter)

        box = gui.widgetBox(splitterH, "Description")
        self.infoGDS = gui.widgetLabel(box, "")
        self.infoGDS.setWordWrap(True)
        gui.rubber(box)

        box = gui.widgetBox(splitterH, "Sample Annotations")
        self.annotationsTree = QTreeWidget(box)
        self.annotationsTree.setHeaderLabels(
            ["Type (Sample annotations)", "Sample count"]
        )
        self.annotationsTree.setRootIsDecorated(True)
        box.layout().addWidget(self.annotationsTree)
        self.annotationsTree.itemChanged.connect(
            self.annotationSelectionChanged
        )
        self._annotationsUpdating = False
        self.splitters = splitter, splitterH

        for sp, setting in zip(self.splitters, self.splitterSettings):
            sp.splitterMoved.connect(self.splitterMoved)
            sp.restoreState(setting)

        self.searchKeys = ["dataset_id", "title", "platform_organism",
                           "description"]

        self.gds = []
        self.gds_info = None

        self.resize(1000, 600)

        self.setBlocking(True)
        self.setEnabled(False)
        self.progressBarInit()

        self._executor = ThreadExecutor()

        func = partial(get_gds_model,
                       methodinvoke(self, "_setProgress", (float,)))
        self._inittask = Task(function=func)
        self._inittask.finished.connect(self._initializemodel)
        self._executor.submit(self._inittask)

        self._datatask = None

    @Slot(float)
    def _setProgress(self, value):
        self.progressBarValue = value

    def _initializemodel(self):
        assert self.thread() is QThread.currentThread()
        model, self.gds_info, self.gds = self._inittask.result()
        model.setParent(self)

        proxy = self.treeWidget.model()
        proxy.setFilterKeyColumn(0)
        proxy.setFilterRole(TextFilterRole)
        proxy.setFilterCaseSensitivity(False)
        proxy.setFilterFixedString(self.filterString)

        proxy.setSourceModel(model)
        proxy.sort(0, Qt.DescendingOrder)

        self.progressBarFinished()
        self.setBlocking(False)
        self.setEnabled(True)

        filter_items = " ".join(
            gds[key] for gds in self.gds for key in self.searchKeys
        )
        tr_chars = ",.:;!?(){}[]_-+\\|/%#@$^&*<>~`"
        tr_table = str.maketrans(tr_chars, " " * len(tr_chars))
        filter_items = filter_items.translate(tr_table)

        filter_items = sorted(set(filter_items.split(" ")))
        filter_items = [item for item in filter_items if len(item) > 3]

        self.completer.setTokenList(filter_items)

        if self.currentGds:
            current_id = self.currentGds["dataset_id"]
            gdss = [(i, qunpack(proxy.data(proxy.index(i, 1), Qt.DisplayRole)))
                    for i in range(proxy.rowCount())]
            current = [i for i, data in gdss if data and data == current_id]
            if current:
                current_index = proxy.index(current[0], 0)
                self.treeWidget.selectionModel().select(
                    current_index,
                    QItemSelectionModel.Select | QItemSelectionModel.Rows
                )
                self.treeWidget.scrollTo(
                    current_index, QTreeView.PositionAtCenter)

        for i in range(8):
            self.treeWidget.resizeColumnToContents(i)

        self.treeWidget.setColumnWidth(
            1, min(self.treeWidget.columnWidth(1), 300))
        self.treeWidget.setColumnWidth(
            2, min(self.treeWidget.columnWidth(2), 200))

        self.updateInfo()

    def updateInfo(self):
        gds_info = self.gds_info
        text = ("%i datasets\n%i datasets cached\n" %
                (len(gds_info),
                 len(glob.glob(serverfiles.localpath("GEO") + "/GDS*"))))
        filtered = self.treeWidget.model().rowCount()
        if len(self.gds) != filtered:
            text += ("%i after filtering") % filtered
        self.infoBox.setText(text)

    def updateSelection(self, *args):
        current = self.treeWidget.selectedIndexes()
        mapToSource = self.treeWidget.model().mapToSource
        current = [mapToSource(index).row() for index in current]
        if current:
            self.currentGds = self.gds[current[0]]
            self.setAnnotations(self.currentGds)
            self.infoGDS.setText(self.currentGds.get("description", ""))
            self.nameEdit.setPlaceholderText(self.currentGds["title"])
            self.datasetName = \
                self.datasetNames.get(self.currentGds["dataset_id"], "")
        else:
            self.currentGds = None
            self.nameEdit.setPlaceholderText("")
            self.datasetName = ""

        self.commitIf()

    def setAnnotations(self, gds):
        self._annotationsUpdating = True
        self.annotationsTree.clear()

        annotations = defaultdict(set)
        subsetscount = {}
        for desc in gds["subsets"]:
            annotations[desc["type"]].add(desc["description"])
            subsetscount[desc["description"]] = str(len(desc["sample_id"]))

        for type, subsets in annotations.items():
            key = (gds["dataset_id"], type)
            subsetItem = QTreeWidgetItem(self.annotationsTree, [type])
            subsetItem.setFlags(subsetItem.flags() | Qt.ItemIsUserCheckable |
                                Qt.ItemIsTristate)
            subsetItem.setCheckState(
                0, self.gdsSelectionStates.get(key, Qt.Checked)
            )
            subsetItem.key = key
            for subset in subsets:
                key = (gds["dataset_id"], type, subset)
                item = QTreeWidgetItem(
                    subsetItem, [subset, subsetscount.get(subset, "")]
                )
                item.setFlags(item.flags() | Qt.ItemIsUserCheckable)
                item.setCheckState(
                    0, self.gdsSelectionStates.get(key, Qt.Checked)
                )
                item.key = key
        self._annotationsUpdating = False
        self.annotationsTree.expandAll()
        for i in range(self.annotationsTree.columnCount()):
            self.annotationsTree.resizeColumnToContents(i)

    def annotationSelectionChanged(self, item, column):
        if self._annotationsUpdating:
            return
        for i in range(self.annotationsTree.topLevelItemCount()):
            item = self.annotationsTree.topLevelItem(i)
            self.gdsSelectionStates[item.key] = item.checkState(0)
            for j in range(item.childCount()):
                child = item.child(j)
                self.gdsSelectionStates[child.key] = child.checkState(0)

    def filter(self):
        filter_string = unicode(self.filterLineEdit.text())
        proxyModel = self.treeWidget.model()
        if proxyModel:
            strings = filter_string.lower().strip().split()
            proxyModel.setFilterFixedStrings(strings)
            self.updateInfo()

    def selectedSamples(self):
        """
        Return the currently selected sample annotations.

        The return value is a list of selected (sample type, sample value)
        tuples.

        .. note:: if some Sample annotation type has no selected values.
                  this method will return all values for it.

        """
        samples = []
        unused_types = []
        used_types = []
        for stype in childiter(self.annotationsTree.invisibleRootItem()):
            selected_values = []
            all_values = []
            for sval in childiter(stype):
                value = (str(stype.text(0)), str(sval.text(0)))
                if self.gdsSelectionStates.get(sval.key, True):
                    selected_values.append(value)
                all_values.append(value)
            if selected_values:
                samples.extend(selected_values)
                used_types.append(str(stype.text(0)))
            else:
                # If no sample of sample type is selected we don't filter
                # on it.
                samples.extend(all_values)
                unused_types.append(str(stype.text(0)))

        return samples, used_types

    def commitIf(self):
        if self.autoCommit:
            self.commit()
        else:
            self.selectionChanged = True

    @Slot(int, int)
    def progressCompleted(self, value, total):
        if total > 0:
            self.progressBarSet(100. * value / total, processEvents=False)
        else:
            pass
            # TODO: report 'indeterminate progress'

    def commit(self):
        if self.currentGds:
            self.error(0)
            sample_type = None
            self.progressBarInit(processEvents=None)

            _, groups = self.selectedSamples()
            if len(groups) == 1 and self.outputRows:
                sample_type = groups[0]

            self.setEnabled(False)
            self.setBlocking(True)

            progress = methodinvoke(self, "progressCompleted", (int, int))

            def get_data(gds_id, report_genes, transpose, sample_type, title):
                gds_ensure_downloaded(gds_id, progress)
                gds = geo.GDS(gds_id)
                data = gds.getdata(
                    report_genes=report_genes, transpose=transpose,
                    sample_type=sample_type
                )
                data.name = title
                return data

            get_data = partial(
                get_data, self.currentGds["dataset_id"],
                report_genes=self.mergeSpots,
                transpose=self.outputRows,
                sample_type=sample_type,
                title=self.datasetName or self.currentGds["title"]
            )
            self._datatask = Task(function=get_data)
            self._datatask.finished.connect(self._on_dataready)
            self._executor.submit(self._datatask)

    def _on_dataready(self):
        self.setEnabled(True)
        self.setBlocking(False)
        self.progressBarFinished(processEvents=False)

        try:
            data = self._datatask.result()
        except urlrequest.URLError as error:
            self.error(0, ("Error while connecting to the NCBI ftp server! "
                           "'%s'" % error))
            sys.excepthook(type(error), error, getattr(error, "__traceback__"))
            return
        finally:
            self._datatask = None

        data_name = data.name
        samples, _ = self.selectedSamples()

        self.warning(0)
        message = None
        if self.outputRows:
            def samplesinst(ex):
                out = []
                for meta in data.domain.metas:
                    out.append((meta.name, ex[meta].value))

                if data.domain.class_var.name != 'class':
                    out.append((data.domain.class_var.name,
                                ex[data.domain.class_var].value))

                return out
            samples = set(samples)
            mask = [samples.issuperset(samplesinst(ex)) for ex in data]
            data = data[numpy.array(mask, dtype=bool)]
            if len(data) == 0:
                message = "No samples with selected sample annotations."
        else:
            samples = set(samples)
            domain = Orange.data.Domain(
                [attr for attr in data.domain.attributes
                 if samples.issuperset(attr.attributes.items())],
                data.domain.class_var,
                data.domain.metas
            )
#             domain.addmetas(data.domain.getmetas())

            if len(domain.attributes) == 0:
                message = "No samples with selected sample annotations."
            stypes = set(s[0] for s in samples)
            for attr in domain.attributes:
                attr.attributes = dict(
                    (key, value) for key, value in attr.attributes.items()
                    if key in stypes
                )
            data = Orange.data.Table(domain, data)

        if message is not None:
            self.warning(0, message)

        data_hints.set_hint(data, "taxid", self.currentGds.get("taxid", ""),
                            10.0)
        data_hints.set_hint(data, "genesinrows", self.outputRows, 10.0)

        data.name = data_name
        self.send("Expression Data", data)

        model = self.treeWidget.model().sourceModel()
        row = self.gds.index(self.currentGds)

        model.setData(model.index(row, 0),  " ", Qt.DisplayRole)

        self.updateInfo()
        self.selectionChanged = False

    def splitterMoved(self, *args):
        self.splitterSettings = [bytes(sp.saveState()) for sp in self.splitters]

    def send_report(self):
        self.report_items("GEO Dataset",
                          [("ID", self.currentGds['dataset_id']), ("Title", self.currentGds['title']),
                           ("Organism", self.currentGds['sample_organism'])])
        self.report_items("Data", [("Samples", self.currentGds['sample_count']),
                                   ("Features", self.currentGds['feature_count']),
                                   ("Genes", self.currentGds['gene_count'])])
        self.report_name("Sample annotations")
        subsets = defaultdict(list)
        for subset in self.currentGds['subsets']:
            subsets[subset['type']].append((subset['description'], len(subset['sample_id'])))
        self.report_html += "<ul>"
        for type in subsets:
            self.report_html += "<b>" + type + ":</b></br>"
            for desc, count in subsets[type]:
                self.report_html += 9 * "&nbsp" + "<b>{}:</b> {}</br>".format(desc, count)
        self.report_html += "</ul>"

    def onDeleteWidget(self):
        if self._inittask:
            self._inittask.future().cancel()
            self._inittask.finished.disconnect(self._initializemodel)
        if self._datatask:
            self._datatask.future().cancel()
            self._datatask.finished.disconnect(self._on_dataready)
        self._executor.shutdown(wait=False)

        super(OWGEODatasets, self).onDeleteWidget()

    def onNameEdited(self):
        if self.currentGds:
            gds_id = self.currentGds["dataset_id"]
            self.datasetNames[gds_id] = unicode(self.nameEdit.text())
            self.commitIf()
class OWKEGGPathwayBrowser(widget.OWWidget):
    name = "KEGG Pathways"
    description = "Browse KEGG pathways that include an input set of genes."
    icon = "../widgets/icons/OWKEGGPathwayBrowser.svg"
    priority = 70

    inputs = [("Data", Orange.data.Table, "SetData", widget.Default),
              ("Reference", Orange.data.Table, "SetRefData")]
    outputs = [("Selected Data", Orange.data.Table, widget.Default),
               ("Unselected Data", Orange.data.Table)]

    autoCommit = settings.Setting(False)
    autoResize = settings.Setting(True)
    useReference = settings.Setting(False)
    showOrthology = settings.Setting(True)

    Ready, Initializing, Running = 0, 1, 2

    class Error(widget.OWWidget.Error):
        missing_annotation = widget.Msg(ERROR_ON_MISSING_ANNOTATION)
        missing_gene_id = widget.Msg(ERROR_ON_MISSING_GENE_ID)
        missing_tax_id = widget.Msg(ERROR_ON_MISSING_TAX_ID)

    def __init__(self, parent=None):
        super().__init__(parent)

        self._changedFlag = False
        self.__invalidated = False
        self.__runstate = OWKEGGPathwayBrowser.Initializing
        self.__in_setProgress = False

        self.controlArea.setMaximumWidth(250)
        box = gui.widgetBox(self.controlArea, "Info")
        self.infoLabel = gui.widgetLabel(box, "No data on input\n")

        gui.separator(self.controlArea)

        gui.checkBox(self.controlArea,
                     self,
                     "useReference",
                     "From signal",
                     box="Reference",
                     callback=self.Update)

        gui.separator(self.controlArea)

        gui.checkBox(
            self.controlArea,
            self,
            "showOrthology",
            "Show pathways in full orthology",
            box="Orthology",
            callback=self.UpdateListView,
        )

        gui.checkBox(
            self.controlArea,
            self,
            "autoResize",
            "Resize to fit",
            box="Image",
            callback=self.UpdatePathwayViewTransform,
        )

        box = gui.widgetBox(self.controlArea, "Cache Control")

        gui.button(
            box,
            self,
            "Clear cache",
            callback=self.ClearCache,
            tooltip="Clear all locally cached KEGG data.",
            default=False,
            autoDefault=False,
        )

        gui.separator(self.controlArea)

        gui.auto_commit(self.controlArea, self, "autoCommit", "Commit")

        gui.rubber(self.controlArea)

        spliter = QSplitter(Qt.Vertical, self.mainArea)
        self.pathwayView = PathwayView(self, spliter)
        self.pathwayView.scene().selectionChanged.connect(
            self._onSelectionChanged)
        self.mainArea.layout().addWidget(spliter)

        self.listView = QTreeWidget(allColumnsShowFocus=True,
                                    selectionMode=QTreeWidget.SingleSelection,
                                    sortingEnabled=True,
                                    maximumHeight=200)

        spliter.addWidget(self.listView)

        self.listView.setColumnCount(4)
        self.listView.setHeaderLabels(
            ["Pathway", "P value", "Genes", "Reference"])

        self.listView.itemSelectionChanged.connect(self.UpdatePathwayView)

        select = QAction("Select All", self, shortcut=QKeySequence.SelectAll)
        select.triggered.connect(self.selectAll)
        self.addAction(select)

        self.data = None
        self.input_genes = []
        self.tax_id = None
        self.use_attr_names = None
        self.gene_id_attribute = None
        self.gene_id_column = None

        self.ref_data = None
        self.ref_genes = []
        self.ref_tax_id = None
        self.ref_use_attr_names = None
        self.ref_gene_id_attribute = None
        self.ref_gene_id_column = None

        self.pathways = {}
        self.ncbi_gene_map = []
        self.org = None

        self._executor = concurrent.ThreadExecutor()
        self.setEnabled(False)
        self.setBlocking(True)
        progress = concurrent.methodinvoke(self, "setProgress", (float, ))

        def get_genome():
            """Return a KEGGGenome with the common org entries precached."""
            genome = kegg.KEGGGenome()

            essential = genome.essential_organisms()
            common = genome.common_organisms()
            # Remove duplicates of essential from common.
            # (essential + common list as defined here will be used in the
            # GUI.)
            common = [c for c in common if c not in essential]

            # TODO: Add option to specify additional organisms not
            # in the common list.

            keys = list(map(genome.org_code_to_entry_key, essential + common))

            genome.pre_cache(keys, progress_callback=progress)
            return (keys, genome)

        self._genomeTask = task = concurrent.Task(function=get_genome)
        task.finished.connect(self.__initialize_finish)

        self.progressBarInit()
        self.infoLabel.setText("Fetching organism definitions\n")
        self._executor.submit(task)

    def __initialize_finish(self):
        if self.__runstate != OWKEGGPathwayBrowser.Initializing:
            return

        try:
            keys, genome = self._genomeTask.result()
        except Exception as err:
            self.error(0, str(err))
            raise

        self.progressBarFinished()
        self.setEnabled(True)
        self.setBlocking(False)

        self.infoLabel.setText("No data on input\n")

    def clear(self):
        """
        Clear the widget state.
        """
        self.pathways = {}
        self.org = None

        self.infoLabel.setText("No data on input\n")
        self.listView.clear()
        self.pathwayView.SetPathway(None)

        self.send("Selected Data", None)
        self.send("Unselected Data", None)

    def SetData(self, data=None):
        if self.__runstate == OWKEGGPathwayBrowser.Initializing:
            self.__initialize_finish()

        self.Error.clear()
        if data:
            self.data = data
            self.tax_id = str(self.data.attributes.get(TAX_ID, None))
            self.use_attr_names = self.data.attributes.get(
                GENE_AS_ATTRIBUTE_NAME, None)
            self.gene_id_attribute = self.data.attributes.get(
                GENE_ID_ATTRIBUTE, None)
            self.gene_id_column = self.data.attributes.get(
                GENE_ID_COLUMN, None)

            if not (self.use_attr_names is not None and
                    ((self.gene_id_attribute is None) ^
                     (self.gene_id_column is None))):

                if self.tax_id is None:
                    self.Error.missing_annotation()
                    return

                self.Error.missing_gene_id()
                return

            elif self.tax_id is None:
                self.Error.missing_tax_id()
                return

            self.warning(0)
            self.error(0)
            self.information(0)

            self.__invalidated = True
        else:
            self.clear()

    def SetRefData(self, data=None):
        self.information(1)

        if data is not None and self.useReference:
            self.ref_data = data
            self.ref_tax_id = str(self.ref_data.attributes.get(TAX_ID, None))
            self.ref_use_attr_names = self.ref_data.attributes.get(
                GENE_AS_ATTRIBUTE_NAME, None)
            self.ref_gene_id_attribute = self.ref_data.attributes.get(
                GENE_ID_ATTRIBUTE, None)
            self.ref_gene_id_column = self.ref_data.attributes.get(
                GENE_ID_COLUMN, None)

            if not (self.ref_use_attr_names is not None and
                    ((self.ref_gene_id_attribute is None) ^
                     (self.ref_gene_id_column is None))):

                if self.ref_tax_id is None:
                    self.Error.missing_annotation()
                    return

                self.Error.missing_gene_id()
                return

            elif self.ref_tax_id is None:
                self.Error.missing_tax_id()
                return

            self.__invalidated = True

    def handleNewSignals(self):
        if self.__invalidated:
            self.Update()
            self.__invalidated = False

    def UpdateListView(self):
        self.bestPValueItem = None
        self.listView.clear()
        if not self.data:
            return

        allPathways = self.org.pathways()
        allRefPathways = kegg.pathways("map")

        items = []
        kegg_pathways = kegg.KEGGPathways()

        org_code = self.org.org_code

        if self.showOrthology:
            self.koOrthology = kegg.KEGGBrite("ko00001")
            self.listView.setRootIsDecorated(True)
            path_ids = {s[-5:] for s in self.pathways.keys()}

            def _walkCollect(koEntry):
                num = koEntry.title[:5] if koEntry.title else None
                if num in path_ids:
                    return [koEntry] + reduce(
                        lambda li, c: li + _walkCollect(c),
                        [child for child in koEntry.entries], [])
                else:
                    c = reduce(lambda li, c: li + _walkCollect(c),
                               [child for child in koEntry.entries], [])
                    return c + (c and [koEntry] or [])

            allClasses = reduce(lambda li1, li2: li1 + li2,
                                [_walkCollect(c) for c in self.koOrthology],
                                [])

            def _walkCreate(koEntry, lvItem):
                item = QTreeWidgetItem(lvItem)
                id = "path:" + org_code + koEntry.title[:5]

                if koEntry.title[:5] in path_ids:
                    p = kegg_pathways.get_entry(id)
                    if p is None:
                        # In case the genesets still have obsolete entries
                        name = koEntry.title
                    else:
                        name = p.name
                    genes, p_value, ref = self.pathways[id]
                    item.setText(0, name)
                    item.setText(1, "%.5f" % p_value)
                    item.setText(
                        2, "%i of %i" % (len(genes), len(self.input_genes)))
                    item.setText(3, "%i of %i" % (ref, len(self.ref_genes)))
                    item.pathway_id = id if p is not None else None
                else:
                    if id in allPathways:
                        text = kegg_pathways.get_entry(id).name
                    else:
                        text = koEntry.title
                    item.setText(0, text)

                    if id in allPathways:
                        item.pathway_id = id
                    elif "path:map" + koEntry.title[:5] in allRefPathways:
                        item.pathway_id = "path:map" + koEntry.title[:5]
                    else:
                        item.pathway_id = None

                for child in koEntry.entries:
                    if child in allClasses:
                        _walkCreate(child, item)

            for koEntry in self.koOrthology:
                if koEntry in allClasses:
                    _walkCreate(koEntry, self.listView)

            self.listView.update()
        else:
            self.listView.setRootIsDecorated(False)
            pathways = self.pathways.items()
            pathways = sorted(pathways, key=lambda item: item[1][1])

            for id, (genes, p_value, ref) in pathways:
                item = QTreeWidgetItem(self.listView)
                item.setText(0, kegg_pathways.get_entry(id).name)
                item.setText(1, "%.5f" % p_value)
                item.setText(2,
                             "%i of %i" % (len(genes), len(self.input_genes)))
                item.setText(3, "%i of %i" % (ref, len(self.ref_genes)))
                item.pathway_id = id
                items.append(item)

        self.bestPValueItem = items and items[0] or None
        self.listView.expandAll()
        for i in range(4):
            self.listView.resizeColumnToContents(i)

        if self.bestPValueItem:
            index = self.listView.indexFromItem(self.bestPValueItem)
            self.listView.selectionModel().select(
                index, QItemSelectionModel.ClearAndSelect)

    def UpdatePathwayView(self):
        items = self.listView.selectedItems()

        if len(items) > 0:
            item = items[0]
        else:
            item = None

        self.commit()
        item = item or self.bestPValueItem
        if not item or not item.pathway_id:
            self.pathwayView.SetPathway(None)
            return

        def get_kgml_and_image(pathway_id):
            """Return an initialized KEGGPathway with pre-cached data"""
            p = kegg.KEGGPathway(pathway_id)
            p._get_kgml()  # makes sure the kgml file is downloaded
            p._get_image_filename()  # makes sure the image is downloaded
            return (pathway_id, p)

        self.setEnabled(False)
        self._pathwayTask = concurrent.Task(
            function=lambda: get_kgml_and_image(item.pathway_id))
        self._pathwayTask.finished.connect(self._onPathwayTaskFinshed)
        self._executor.submit(self._pathwayTask)

    def _onPathwayTaskFinshed(self):
        self.setEnabled(True)
        pathway_id, self.pathway = self._pathwayTask.result()
        objects = self.pathways.get(pathway_id, [[]])[0]  # [ncbi_gene_id]
        # map ncbi_gene_id to keg_id for display
        objects = flatten(relation_map(self.ncbi_gene_map, objects))
        self.pathwayView.SetPathway(self.pathway, objects)

    def UpdatePathwayViewTransform(self):
        self.pathwayView.updateTransform()

    def Update(self):
        """
        Update (recompute enriched pathways) the widget state.
        """
        if not self.data:
            return

        self.error(0)
        self.information(0)

        # XXX: Check data in setData, do not even allow this to be executed if
        # data has no genes
        try:
            self.__get_input_genes()
            self.input_genes = set(self.input_genes)
        except ValueError:
            self.error(0, "Cannot extract gene names from input.")

        self.information(1)

        self.org = kegg.KEGGOrganism(kegg.from_taxid(self.tax_id))

        if self.useReference and self.ref_data:
            self.__get_ref_genes()
            self.ref_genes = set(self.ref_genes)
        else:
            self.ref_genes = self.org.get_ncbi_ids()

        def run_enrichment(org_code, genes, reference, progress=None):
            # We use the kegg pathway gene sets provided by 'geneset' for
            # the enrichment calculation.

            kegg_api = kegg.api.CachedKeggApi()
            link_map = kegg_api.link(org_code,
                                     "pathway")  # [(pathway_id, kegg_gene_id)]
            ncbi_gene_map = kegg_api.conv(
                org_code, 'ncbi-geneid')  # [(ncbi_gene_id, kegg_gene_id)]
            ncbi_gene_map = [(_1.split(":", 1)[1], _2)
                             for _1, _2 in ncbi_gene_map]
            link_map = relation_join(
                link_map,
                [(_2, _1)
                 for _1, _2 in ncbi_gene_map])  # [(pathway_id, ncbi_gene_id)]
            kegg_sets = relation_list_to_multimap(
                link_map)  #  {pathway_id -> [ncbi_gene_ids]}
            # map kegg gene ids to ncbi_gene_ids.
            kegg_sets = geneset.GeneSets(sets=[
                geneset.GeneSet(gs_id=ddi, genes=set(genes))
                for ddi, genes in kegg_sets.items()
            ])
            pathways = pathway_enrichment(kegg_sets,
                                          genes,
                                          reference,
                                          callback=progress)
            # Ensure that pathway entries are pre-cached for later use in the
            # list/tree view
            kegg_pathways = kegg.KEGGPathways()
            kegg_pathways.pre_cache(pathways.keys(),
                                    progress_callback=progress)

            return pathways, ncbi_gene_map

        self.progressBarInit()
        self.setEnabled(False)
        self.infoLabel.setText("Retrieving...\n")

        progress = concurrent.methodinvoke(self, "setProgress", (float, ))

        run_func = partial(run_enrichment, self.org.org_code, self.input_genes,
                           self.ref_genes, progress)
        self._enrichTask = concurrent.Task(function=run_func)
        self._enrichTask.finished.connect(self._onEnrichTaskFinished)
        self._executor.submit(self._enrichTask)

    def _onEnrichTaskFinished(self):
        self.setEnabled(True)
        self.setBlocking(False)
        try:
            pathways, ncbi_gene_map = self._enrichTask.result()
        except Exception:
            raise

        self.progressBarFinished()

        self.pathways = pathways
        self.ncbi_gene_map = ncbi_gene_map

        if not self.pathways:
            self.warning(0, "No enriched pathways found.")
        else:
            self.warning(0)

        self.infoLabel.setText("{} unique gene names on input\n".format(
            len(set(self.input_genes))))

        self.UpdateListView()

    @Slot(float)
    def setProgress(self, value):
        if self.__in_setProgress:
            return

        self.__in_setProgress = True
        self.progressBarSet(value)
        self.__in_setProgress = False

    def __get_input_genes(self):
        """
        Extract and return gene names from `data`.
        """
        self.input_genes = []

        if self.use_attr_names:
            for variable in self.data.domain.attributes:
                self.input_genes.append(
                    str(variable.attributes.get(self.gene_id_attribute, '?')))
        else:
            genes, _ = self.data.get_column_view(self.gene_id_column)
            self.input_genes = [str(g) for g in genes]

        if len(self.input_genes) <= 0:
            raise ValueError("No gene names in data.")

    def __get_ref_genes(self):
        """
        Extract and return gene names from `data`.
        """
        self.ref_genes = []

        if self.ref_use_attr_names:
            for variable in self.ref_data.domain.attributes:
                self.ref_genes.append(
                    str(
                        variable.attributes.get(self.ref_gene_id_attribute,
                                                '?')))
        else:
            genes, _ = self.ref_data.get_column_view(self.ref_gene_id_column)
            self.ref_genes = [str(g) for g in genes]

    def selectAll(self):
        """
        Select all items in the pathway view.
        """
        changed = False
        scene = self.pathwayView.scene()
        with disconnected(scene.selectionChanged, self._onSelectionChanged):
            for item in scene.items():
                if item.flags(
                ) & QGraphicsItem.ItemIsSelectable and not item.isSelected():
                    item.setSelected(True)
                    changed = True
        if changed:
            self._onSelectionChanged()

    def _onSelectionChanged(self):
        # Item selection in the pathwayView/scene has changed
        self.commit()

    def commit(self):
        if self.data:
            selectedItems = self.pathwayView.scene().selectedItems()
            selectedGenes = reduce(
                set.union, [item.marked_objects for item in selectedItems],
                set())
            # map kegg_ids back to ncbi_gene_id
            backmap = [(_2, _1) for _1, _2 in self.ncbi_gene_map]
            selectedGenes = set(flatten(relation_map(backmap, selectedGenes)))
            if self.use_attr_names:
                selected = [
                    column for column in self.data.domain.attributes
                    if self.gene_id_attribute in column.attributes
                    and str(column.attributes[
                        self.gene_id_attribute]) in selectedGenes
                ]
                data = self.data[:, selected]
                self.send("Selected Data", data)
            else:
                selected_indices = []
                other_indices = []

                for row_index, row in enumerate(self.data):
                    gene_in_row = str(row[self.gene_id_column])
                    if gene_in_row in self.input_genes and gene_in_row in selectedGenes:
                        selected_indices.append(row_index)
                    else:
                        other_indices.append(row_index)

                if selected_indices:
                    selected = self.data[selected_indices]
                else:
                    selected = None

                if other_indices:
                    other = self.data[other_indices]
                else:
                    other = None

                self.send("Selected Data", selected)
                self.send("Unselected Data", other)
        else:
            self.send("Selected Data", None)
            self.send("Unselected Data", None)

    def ClearCache(self):
        kegg.caching.clear_cache()

    def onDeleteWidget(self):
        """
        Called before the widget is removed from the canvas.
        """
        super().onDeleteWidget()

        self.org = None
        self._executor.shutdown(wait=False)
        gc.collect()  # Force collection (WHY?)

    def sizeHint(self):
        return QSize(1024, 720)
class OWKEGGPathwayBrowser(widget.OWWidget):
    name = "KEGG Pathways"
    description = "Browse KEGG pathways that include an input set of genes."
    icon = "../widgets/icons/KEGGPathways.svg"
    priority = 2030

    inputs = [("Data", Orange.data.Table, "SetData", widget.Default), ("Reference", Orange.data.Table, "SetRefData")]
    outputs = [("Selected Data", Orange.data.Table, widget.Default), ("Unselected Data", Orange.data.Table)]

    settingsHandler = settings.DomainContextHandler()

    organismIndex = settings.ContextSetting(0)
    geneAttrIndex = settings.ContextSetting(0)
    useAttrNames = settings.ContextSetting(False)

    autoCommit = settings.Setting(False)
    autoResize = settings.Setting(True)
    useReference = settings.Setting(False)
    showOrthology = settings.Setting(True)

    Ready, Initializing, Running = 0, 1, 2

    def __init__(self, parent=None):
        super().__init__(parent)

        self.organismCodes = []
        self._changedFlag = False
        self.__invalidated = False
        self.__runstate = OWKEGGPathwayBrowser.Initializing
        self.__in_setProgress = False

        self.controlArea.setMaximumWidth(250)
        box = gui.widgetBox(self.controlArea, "Info")
        self.infoLabel = gui.widgetLabel(box, "No data on input\n")

        # Organism selection.
        box = gui.widgetBox(self.controlArea, "Organism")
        self.organismComboBox = gui.comboBox(
            box,
            self,
            "organismIndex",
            items=[],
            callback=self.Update,
            addSpace=True,
            tooltip="Select the organism of the input genes",
        )

        # Selection of genes attribute
        box = gui.widgetBox(self.controlArea, "Gene attribute")
        self.geneAttrCandidates = itemmodels.VariableListModel(parent=self)
        self.geneAttrCombo = gui.comboBox(box, self, "geneAttrIndex", callback=self.Update)
        self.geneAttrCombo.setModel(self.geneAttrCandidates)

        gui.checkBox(
            box, self, "useAttrNames", "Use variable names", disables=[(-1, self.geneAttrCombo)], callback=self.Update
        )

        self.geneAttrCombo.setDisabled(bool(self.useAttrNames))

        gui.separator(self.controlArea)

        gui.checkBox(self.controlArea, self, "useReference", "From signal", box="Reference", callback=self.Update)

        gui.separator(self.controlArea)

        gui.checkBox(
            self.controlArea,
            self,
            "showOrthology",
            "Show pathways in full orthology",
            box="Orthology",
            callback=self.UpdateListView,
        )

        gui.checkBox(
            self.controlArea, self, "autoResize", "Resize to fit", box="Image", callback=self.UpdatePathwayViewTransform
        )

        box = gui.widgetBox(self.controlArea, "Cache Control")

        gui.button(
            box,
            self,
            "Clear cache",
            callback=self.ClearCache,
            tooltip="Clear all locally cached KEGG data.",
            default=False,
            autoDefault=False,
        )

        gui.separator(self.controlArea)

        gui.auto_commit(self.controlArea, self, "autoCommit", "Commit")

        gui.rubber(self.controlArea)

        spliter = QSplitter(Qt.Vertical, self.mainArea)
        self.pathwayView = PathwayView(self, spliter)
        self.pathwayView.scene().selectionChanged.connect(self._onSelectionChanged)
        self.mainArea.layout().addWidget(spliter)

        self.listView = QTreeWidget(
            allColumnsShowFocus=True, selectionMode=QTreeWidget.SingleSelection, sortingEnabled=True, maximumHeight=200
        )

        spliter.addWidget(self.listView)

        self.listView.setColumnCount(4)
        self.listView.setHeaderLabels(["Pathway", "P value", "Genes", "Reference"])

        self.listView.itemSelectionChanged.connect(self.UpdatePathwayView)

        select = QAction("Select All", self, shortcut=QKeySequence.SelectAll)
        select.triggered.connect(self.selectAll)
        self.addAction(select)

        self.data = None
        self.refData = None

        self._executor = concurrent.ThreadExecutor()
        self.setEnabled(False)
        self.setBlocking(True)
        progress = concurrent.methodinvoke(self, "setProgress", (float,))

        def get_genome():
            """Return a KEGGGenome with the common org entries precached."""
            genome = kegg.KEGGGenome()

            essential = genome.essential_organisms()
            common = genome.common_organisms()
            # Remove duplicates of essential from common.
            # (essential + common list as defined here will be used in the
            # GUI.)
            common = [c for c in common if c not in essential]

            # TODO: Add option to specify additional organisms not
            # in the common list.

            keys = list(map(genome.org_code_to_entry_key, essential + common))

            genome.pre_cache(keys, progress_callback=progress)
            return (keys, genome)

        self._genomeTask = task = concurrent.Task(function=get_genome)
        task.finished.connect(self.__initialize_finish)

        self.progressBarInit()
        self.infoLabel.setText("Fetching organism definitions\n")
        self._executor.submit(task)

    def __initialize_finish(self):
        if self.__runstate != OWKEGGPathwayBrowser.Initializing:
            return

        try:
            keys, genome = self._genomeTask.result()
        except Exception as err:
            self.error(0, str(err))
            raise

        self.progressBarFinished()
        self.setEnabled(True)
        self.setBlocking(False)

        entries = [genome[key] for key in keys]
        items = [entry.definition for entry in entries]
        codes = [entry.organism_code for entry in entries]

        self.organismCodes = codes
        self.organismComboBox.clear()
        self.organismComboBox.addItems(items)
        self.organismComboBox.setCurrentIndex(self.organismIndex)

        self.infoLabel.setText("No data on input\n")

    def Clear(self):
        """
        Clear the widget state.
        """
        self.queryGenes = []
        self.referenceGenes = []
        self.genes = {}
        self.uniqueGenesDict = {}
        self.revUniqueGenesDict = {}
        self.pathways = {}
        self.org = None
        self.geneAttrCandidates[:] = []

        self.infoLabel.setText("No data on input\n")
        self.listView.clear()
        self.pathwayView.SetPathway(None)

        self.send("Selected Data", None)
        self.send("Unselected Data", None)

    def SetData(self, data=None):
        if self.__runstate == OWKEGGPathwayBrowser.Initializing:
            self.__initialize_finish()

        self.closeContext()
        self.data = data
        self.warning(0)
        self.error(0)
        self.information(0)

        if data is not None:
            vars = data.domain.variables + data.domain.metas
            vars = [var for var in vars if isinstance(var, Orange.data.StringVariable)]
            self.geneAttrCandidates[:] = vars

            # Try to guess the gene name variable
            if vars:
                names_lower = [v.name.lower() for v in vars]
                scores = [(name == "gene", "gene" in name) for name in names_lower]
                imax, _ = max(enumerate(scores), key=itemgetter(1))
            else:
                imax = -1

            self.geneAttrIndex = imax

            taxid = data_hints.get_hint(data, "taxid", None)
            if taxid:
                try:
                    code = kegg.from_taxid(taxid)
                    self.organismIndex = self.organismCodes.index(code)
                except Exception as ex:
                    print(ex, taxid)

            self.useAttrNames = data_hints.get_hint(data, "genesinrows", self.useAttrNames)
            self.openContext(data)

            if len(self.geneAttrCandidates) == 0:
                self.useAttrNames = True
                self.geneAttrIndex = -1
            else:
                self.geneAttrIndex = min(self.geneAttrIndex, len(self.geneAttrCandidates) - 1)
        else:
            self.Clear()

        self.__invalidated = True

    def SetRefData(self, data=None):
        self.refData = data
        self.information(1)

        if data is not None and self.useReference:
            self.__invalidated = True

    def handleNewSignals(self):
        if self.__invalidated:
            self.Update()
            self.__invalidated = False

    def UpdateListView(self):
        self.bestPValueItem = None
        self.listView.clear()
        if not self.data:
            return

        allPathways = self.org.pathways()
        allRefPathways = kegg.pathways("map")

        items = []
        kegg_pathways = kegg.KEGGPathways()

        org_code = self.organismCodes[min(self.organismIndex, len(self.organismCodes) - 1)]

        if self.showOrthology:
            self.koOrthology = kegg.KEGGBrite("ko00001")
            self.listView.setRootIsDecorated(True)
            path_ids = set([s[-5:] for s in self.pathways.keys()])

            def _walkCollect(koEntry):
                num = koEntry.title[:5] if koEntry.title else None
                if num in path_ids:
                    return [koEntry] + reduce(
                        lambda li, c: li + _walkCollect(c), [child for child in koEntry.entries], []
                    )
                else:
                    c = reduce(lambda li, c: li + _walkCollect(c), [child for child in koEntry.entries], [])
                    return c + (c and [koEntry] or [])

            allClasses = reduce(lambda li1, li2: li1 + li2, [_walkCollect(c) for c in self.koOrthology], [])

            def _walkCreate(koEntry, lvItem):
                item = QTreeWidgetItem(lvItem)
                id = "path:" + org_code + koEntry.title[:5]

                if koEntry.title[:5] in path_ids:
                    p = kegg_pathways.get_entry(id)
                    if p is None:
                        # In case the genesets still have obsolete entries
                        name = koEntry.title
                    else:
                        name = p.name
                    genes, p_value, ref = self.pathways[id]
                    item.setText(0, name)
                    item.setText(1, "%.5f" % p_value)
                    item.setText(2, "%i of %i" % (len(genes), len(self.genes)))
                    item.setText(3, "%i of %i" % (ref, len(self.referenceGenes)))
                    item.pathway_id = id if p is not None else None
                else:
                    if id in allPathways:
                        text = kegg_pathways.get_entry(id).name
                    else:
                        text = koEntry.title
                    item.setText(0, text)

                    if id in allPathways:
                        item.pathway_id = id
                    elif "path:map" + koEntry.title[:5] in allRefPathways:
                        item.pathway_id = "path:map" + koEntry.title[:5]
                    else:
                        item.pathway_id = None

                for child in koEntry.entries:
                    if child in allClasses:
                        _walkCreate(child, item)

            for koEntry in self.koOrthology:
                if koEntry in allClasses:
                    _walkCreate(koEntry, self.listView)

            self.listView.update()
        else:
            self.listView.setRootIsDecorated(False)
            pathways = self.pathways.items()
            pathways = sorted(pathways, key=lambda item: item[1][1])

            for id, (genes, p_value, ref) in pathways:
                item = QTreeWidgetItem(self.listView)
                item.setText(0, kegg_pathways.get_entry(id).name)
                item.setText(1, "%.5f" % p_value)
                item.setText(2, "%i of %i" % (len(genes), len(self.genes)))
                item.setText(3, "%i of %i" % (ref, len(self.referenceGenes)))
                item.pathway_id = id
                items.append(item)

        self.bestPValueItem = items and items[0] or None
        self.listView.expandAll()
        for i in range(4):
            self.listView.resizeColumnToContents(i)

        if self.bestPValueItem:
            index = self.listView.indexFromItem(self.bestPValueItem)
            self.listView.selectionModel().select(index, QItemSelectionModel.ClearAndSelect)

    def UpdatePathwayView(self):
        items = self.listView.selectedItems()

        if len(items) > 0:
            item = items[0]
        else:
            item = None

        self.commit()
        item = item or self.bestPValueItem
        if not item or not item.pathway_id:
            self.pathwayView.SetPathway(None)
            return

        def get_kgml_and_image(pathway_id):
            """Return an initialized KEGGPathway with pre-cached data"""
            p = kegg.KEGGPathway(pathway_id)
            p._get_kgml()  # makes sure the kgml file is downloaded
            p._get_image_filename()  # makes sure the image is downloaded
            return (pathway_id, p)

        self.setEnabled(False)
        self._pathwayTask = concurrent.Task(function=lambda: get_kgml_and_image(item.pathway_id))
        self._pathwayTask.finished.connect(self._onPathwayTaskFinshed)
        self._executor.submit(self._pathwayTask)

    def _onPathwayTaskFinshed(self):
        self.setEnabled(True)
        pathway_id, self.pathway = self._pathwayTask.result()
        self.pathwayView.SetPathway(self.pathway, self.pathways.get(pathway_id, [[]])[0])

    def UpdatePathwayViewTransform(self):
        self.pathwayView.updateTransform()

    def Update(self):
        """
        Update (recompute enriched pathways) the widget state.
        """
        if not self.data:
            return

        self.error(0)
        self.information(0)

        # XXX: Check data in setData, do not even alow this to be executed if
        # data has no genes
        try:
            genes = self.GeneNamesFromData(self.data)
        except ValueError:
            self.error(0, "Cannot extract gene names from input.")
            genes = []

        if not self.useAttrNames and any("," in gene for gene in genes):
            genes = reduce(add, (split_and_strip(gene, ",") for gene in genes), [])
            self.information(0, "Separators detected in input gene names. " "Assuming multiple genes per instance.")
        self.queryGenes = genes

        self.information(1)
        reference = None
        if self.useReference and self.refData:
            reference = self.GeneNamesFromData(self.refData)
            if not self.useAttrNames and any("," in gene for gene in reference):
                reference = reduce(add, (split_and_strip(gene, ",") for gene in reference), [])
                self.information(
                    1, "Separators detected in reference gene " "names. Assuming multiple genes per " "instance."
                )

        org_code = self.SelectedOrganismCode()

        def run_enrichment(org_code, genes, reference=None, progress=None):
            org = kegg.KEGGOrganism(org_code)
            if reference is None:
                reference = org.get_genes()

            # Map 'genes' and 'reference' sets to unique KEGG identifiers
            unique_genes, _, _ = org.get_unique_gene_ids(set(genes))
            unique_ref_genes, _, _ = org.get_unique_gene_ids(set(reference))

            taxid = kegg.to_taxid(org.org_code)
            # Map the taxid back to standard 'common' taxids
            # (as used by 'geneset') if applicable
            r_tax_map = dict((v, k) for k, v in kegg.KEGGGenome.TAXID_MAP.items())
            if taxid in r_tax_map:
                taxid = r_tax_map[taxid]

            # We use the kegg pathway gene sets provided by 'geneset' for
            # the enrichment calculation.

            # Ensure we are using the latest genesets
            # TODO: ?? Is updating the index enough?
            serverfiles.update(geneset.sfdomain, "index.pck")
            kegg_gs_collections = geneset.collections((("KEGG", "pathways"), taxid))

            pathways = pathway_enrichment(
                kegg_gs_collections, unique_genes.keys(), unique_ref_genes.keys(), callback=progress
            )
            # Ensure that pathway entries are pre-cached for later use in the
            # list/tree view
            kegg_pathways = kegg.KEGGPathways()
            kegg_pathways.pre_cache(pathways.keys(), progress_callback=progress)

            return pathways, org, unique_genes, unique_ref_genes

        self.progressBarInit()
        self.setEnabled(False)
        self.infoLabel.setText("Retrieving...\n")

        progress = concurrent.methodinvoke(self, "setProgress", (float,))
        self._enrichTask = concurrent.Task(function=lambda: run_enrichment(org_code, genes, reference, progress))
        self._enrichTask.finished.connect(self._onEnrichTaskFinished)
        self._executor.submit(self._enrichTask)

    def _onEnrichTaskFinished(self):
        self.setEnabled(True)
        self.setBlocking(False)
        try:
            pathways, org, unique_genes, unique_ref_genes = self._enrichTask.result()
        except Exception:
            raise

        self.progressBarFinished()

        self.org = org
        self.genes = unique_genes.keys()
        self.uniqueGenesDict = unique_genes
        self.revUniqueGenesDict = dict([(val, key) for key, val in self.uniqueGenesDict.items()])
        self.referenceGenes = unique_ref_genes.keys()
        self.pathways = pathways

        if not self.pathways:
            self.warning(0, "No enriched pathways found.")
        else:
            self.warning(0)

        count = len(set(self.queryGenes))
        self.infoLabel.setText(
            "%i unique gene names on input\n"
            "%i (%.1f%%) genes names matched"
            % (count, len(unique_genes), 100.0 * len(unique_genes) / count if count else 0.0)
        )

        self.UpdateListView()

    @Slot(float)
    def setProgress(self, value):
        if self.__in_setProgress:
            return

        self.__in_setProgress = True
        self.progressBarSet(value)
        self.__in_setProgress = False

    def GeneNamesFromData(self, data):
        """
        Extract and return gene names from `data`.
        """
        if self.useAttrNames:
            genes = [str(v.name).strip() for v in data.domain.attributes]
        elif self.geneAttrCandidates:
            assert 0 <= self.geneAttrIndex < len(self.geneAttrCandidates)
            geneAttr = self.geneAttrCandidates[self.geneAttrIndex]
            genes = [str(e[geneAttr]) for e in data if not numpy.isnan(e[geneAttr])]
        else:
            raise ValueError("No gene names in data.")
        return genes

    def SelectedOrganismCode(self):
        """
        Return the selected organism code.
        """
        return self.organismCodes[min(self.organismIndex, len(self.organismCodes) - 1)]

    def selectAll(self):
        """
        Select all items in the pathway view.
        """
        changed = False
        scene = self.pathwayView.scene()
        with disconnected(scene.selectionChanged, self._onSelectionChanged):
            for item in scene.items():
                if item.flags() & QGraphicsItem.ItemIsSelectable and not item.isSelected():
                    item.setSelected(True)
                    changed = True
        if changed:
            self._onSelectionChanged()

    def _onSelectionChanged(self):
        # Item selection in the pathwayView/scene has changed
        self.commit()

    def commit(self):
        if self.data:
            selectedItems = self.pathwayView.scene().selectedItems()
            selectedGenes = reduce(set.union, [item.marked_objects for item in selectedItems], set())

            if self.useAttrNames:
                selected = [self.data.domain[self.uniqueGenesDict[gene]] for gene in selectedGenes]
                #                 newDomain = Orange.data.Domain(selectedVars, 0)
                data = self.data[:, selected]
                #                 data = Orange.data.Table(newDomain, self.data)
                self.send("Selected Data", data)
            elif self.geneAttrCandidates:
                assert 0 <= self.geneAttrIndex < len(self.geneAttrCandidates)
                geneAttr = self.geneAttrCandidates[self.geneAttrIndex]
                selectedIndices = []
                otherIndices = []
                for i, ex in enumerate(self.data):
                    names = [
                        self.revUniqueGenesDict.get(name, None) for name in split_and_strip(str(ex[geneAttr]), ",")
                    ]
                    if any(name and name in selectedGenes for name in names):
                        selectedIndices.append(i)
                    else:
                        otherIndices.append(i)

                if selectedIndices:
                    selected = self.data[selectedIndices]
                else:
                    selected = None

                if otherIndices:
                    other = self.data[otherIndices]
                else:
                    other = None

                self.send("Selected Data", selected)
                self.send("Unselected Data", other)
        else:
            self.send("Selected Data", None)
            self.send("Unselected Data", None)

    def ClearCache(self):
        kegg.caching.clear_cache()

    def onDeleteWidget(self):
        """
        Called before the widget is removed from the canvas.
        """
        super().onDeleteWidget()

        self.org = None
        self._executor.shutdown(wait=False)
        gc.collect()  # Force collection (WHY?)

    def sizeHint(self):
        return QSize(1024, 720)
class OWKEGGPathwayBrowser(widget.OWWidget):
    name = "KEGG Pathways"
    description = "Browse KEGG pathways that include an input set of genes."
    icon = "../widgets/icons/OWKEGGPathwayBrowser.svg"
    priority = 8

    inputs = [("Data", Orange.data.Table, "SetData", widget.Default),
              ("Reference", Orange.data.Table, "SetRefData")]
    outputs = [("Selected Data", Orange.data.Table, widget.Default),
               ("Unselected Data", Orange.data.Table)]

    settingsHandler = settings.DomainContextHandler()

    organismIndex = settings.ContextSetting(0)
    geneAttrIndex = settings.ContextSetting(0)
    useAttrNames = settings.ContextSetting(False)

    autoCommit = settings.Setting(False)
    autoResize = settings.Setting(True)
    useReference = settings.Setting(False)
    showOrthology = settings.Setting(True)

    Ready, Initializing, Running = 0, 1, 2

    def __init__(self, parent=None):
        super().__init__(parent)

        self.organismCodes = []
        self._changedFlag = False
        self.__invalidated = False
        self.__runstate = OWKEGGPathwayBrowser.Initializing
        self.__in_setProgress = False

        self.controlArea.setMaximumWidth(250)
        box = gui.widgetBox(self.controlArea, "Info")
        self.infoLabel = gui.widgetLabel(box, "No data on input\n")

        # Organism selection.
        box = gui.widgetBox(self.controlArea, "Organism")
        self.organismComboBox = gui.comboBox(
            box, self, "organismIndex",
            items=[],
            callback=self.Update,
            addSpace=True,
            tooltip="Select the organism of the input genes")

        # Selection of genes attribute
        box = gui.widgetBox(self.controlArea, "Gene attribute")
        self.geneAttrCandidates = itemmodels.VariableListModel(parent=self)
        self.geneAttrCombo = gui.comboBox(
            box, self, "geneAttrIndex", callback=self.Update)
        self.geneAttrCombo.setModel(self.geneAttrCandidates)

        gui.checkBox(box, self, "useAttrNames",
                    "Use variable names", disables=[(-1, self.geneAttrCombo)],
                    callback=self.Update)

        self.geneAttrCombo.setDisabled(bool(self.useAttrNames))

        gui.separator(self.controlArea)

        gui.checkBox(self.controlArea, self, "useReference",
                    "From signal", box="Reference", callback=self.Update)

        gui.separator(self.controlArea)

        gui.checkBox(self.controlArea, self, "showOrthology",
                     "Show pathways in full orthology", box="Orthology",
                     callback=self.UpdateListView)

        gui.checkBox(self.controlArea, self, "autoResize",
                     "Resize to fit", box="Image",
                     callback=self.UpdatePathwayViewTransform)

        box = gui.widgetBox(self.controlArea, "Cache Control")

        gui.button(box, self, "Clear cache",
                   callback=self.ClearCache,
                   tooltip="Clear all locally cached KEGG data.",
                   default=False, autoDefault=False)

        gui.separator(self.controlArea)

        gui.auto_commit(self.controlArea, self, "autoCommit", "Commit")

        gui.rubber(self.controlArea)

        spliter = QSplitter(Qt.Vertical, self.mainArea)
        self.pathwayView = PathwayView(self, spliter)
        self.pathwayView.scene().selectionChanged.connect(
            self._onSelectionChanged
        )
        self.mainArea.layout().addWidget(spliter)

        self.listView = QTreeWidget(
            allColumnsShowFocus=True,
            selectionMode=QTreeWidget.SingleSelection,
            sortingEnabled=True,
            maximumHeight=200)

        spliter.addWidget(self.listView)

        self.listView.setColumnCount(4)
        self.listView.setHeaderLabels(
            ["Pathway", "P value", "Genes", "Reference"])

        self.listView.itemSelectionChanged.connect(self.UpdatePathwayView)

        select = QAction(
            "Select All", self,
            shortcut=QKeySequence.SelectAll
        )
        select.triggered.connect(self.selectAll)
        self.addAction(select)

        self.data = None
        self.refData = None

        self._executor = concurrent.ThreadExecutor()
        self.setEnabled(False)
        self.setBlocking(True)
        progress = concurrent.methodinvoke(self, "setProgress", (float,))

        def get_genome():
            """Return a KEGGGenome with the common org entries precached."""
            genome = kegg.KEGGGenome()

            essential = genome.essential_organisms()
            common = genome.common_organisms()
            # Remove duplicates of essential from common.
            # (essential + common list as defined here will be used in the
            # GUI.)
            common = [c for c in common if c not in essential]

            # TODO: Add option to specify additional organisms not
            # in the common list.

            keys = list(map(genome.org_code_to_entry_key, essential + common))

            genome.pre_cache(keys, progress_callback=progress)
            return (keys, genome)

        self._genomeTask = task = concurrent.Task(function=get_genome)
        task.finished.connect(self.__initialize_finish)

        self.progressBarInit()
        self.infoLabel.setText("Fetching organism definitions\n")
        self._executor.submit(task)

    def __initialize_finish(self):
        if self.__runstate != OWKEGGPathwayBrowser.Initializing:
            return

        try:
            keys, genome = self._genomeTask.result()
        except Exception as err:
            self.error(0, str(err))
            raise

        self.progressBarFinished()
        self.setEnabled(True)
        self.setBlocking(False)

        entries = [genome[key] for key in keys]
        items = [entry.definition for entry in entries]
        codes = [entry.organism_code for entry in entries]

        self.organismCodes = codes
        self.organismComboBox.clear()
        self.organismComboBox.addItems(items)
        self.organismComboBox.setCurrentIndex(self.organismIndex)

        self.infoLabel.setText("No data on input\n")

    def Clear(self):
        """
        Clear the widget state.
        """
        self.queryGenes = []
        self.referenceGenes = []
        self.genes = {}
        self.uniqueGenesDict = {}
        self.revUniqueGenesDict = {}
        self.pathways = {}
        self.org = None
        self.geneAttrCandidates[:] = []

        self.infoLabel.setText("No data on input\n")
        self.listView.clear()
        self.pathwayView.SetPathway(None)

        self.send("Selected Data", None)
        self.send("Unselected Data", None)

    def SetData(self, data=None):
        if self.__runstate == OWKEGGPathwayBrowser.Initializing:
            self.__initialize_finish()

        self.data = data
        self.warning(0)
        self.error(0)
        self.information(0)

        if data is not None:
            vars = data.domain.variables + data.domain.metas
            vars = [var for var in vars
                    if isinstance(var, Orange.data.StringVariable)]
            self.geneAttrCandidates[:] = vars

            # Try to guess the gene name variable
            if vars:
                names_lower = [v.name.lower() for v in vars]
                scores = [(name == "gene", "gene" in name)
                          for name in names_lower]
                imax, _ = max(enumerate(scores), key=itemgetter(1))
            else:
                imax = -1

            self.geneAttrIndex = imax

            taxid = data_hints.get_hint(data, TAX_ID, None)
            if taxid:
                try:
                    code = kegg.from_taxid(taxid)
                    self.organismIndex = self.organismCodes.index(code)
                except Exception as ex:
                    print(ex, taxid)

            self.useAttrNames = data_hints.get_hint(data, GENE_NAME, self.useAttrNames)

            if len(self.geneAttrCandidates) == 0:
                self.useAttrNames = True
                self.geneAttrIndex = -1
            else:
                self.geneAttrIndex = min(self.geneAttrIndex,
                                         len(self.geneAttrCandidates) - 1)
        else:
            self.Clear()

        self.__invalidated = True

    def SetRefData(self, data=None):
        self.refData = data
        self.information(1)

        if data is not None and self.useReference:
            self.__invalidated = True

    def handleNewSignals(self):
        if self.__invalidated:
            self.Update()
            self.__invalidated = False

    def UpdateListView(self):
        self.bestPValueItem = None
        self.listView.clear()
        if not self.data:
            return

        allPathways = self.org.pathways()
        allRefPathways = kegg.pathways("map")

        items = []
        kegg_pathways = kegg.KEGGPathways()

        org_code = self.organismCodes[min(self.organismIndex,
                                          len(self.organismCodes) - 1)]

        if self.showOrthology:
            self.koOrthology = kegg.KEGGBrite("ko00001")
            self.listView.setRootIsDecorated(True)
            path_ids = set([s[-5:] for s in self.pathways.keys()])

            def _walkCollect(koEntry):
                num = koEntry.title[:5] if koEntry.title else None
                if num in path_ids:
                    return ([koEntry] +
                            reduce(lambda li, c: li + _walkCollect(c),
                                   [child for child in koEntry.entries],
                                   []))
                else:
                    c = reduce(lambda li, c: li + _walkCollect(c),
                               [child for child in koEntry.entries],
                               [])
                    return c + (c and [koEntry] or [])

            allClasses = reduce(lambda li1, li2: li1 + li2,
                                [_walkCollect(c) for c in self.koOrthology],
                                [])

            def _walkCreate(koEntry, lvItem):
                item = QTreeWidgetItem(lvItem)
                id = "path:" + org_code + koEntry.title[:5]

                if koEntry.title[:5] in path_ids:
                    p = kegg_pathways.get_entry(id)
                    if p is None:
                        # In case the genesets still have obsolete entries
                        name = koEntry.title
                    else:
                        name = p.name
                    genes, p_value, ref = self.pathways[id]
                    item.setText(0, name)
                    item.setText(1, "%.5f" % p_value)
                    item.setText(2, "%i of %i" % (len(genes), len(self.genes)))
                    item.setText(3, "%i of %i" % (ref, len(self.referenceGenes)))
                    item.pathway_id = id if p is not None else None
                else:
                    if id in allPathways:
                        text = kegg_pathways.get_entry(id).name
                    else:
                        text = koEntry.title
                    item.setText(0, text)

                    if id in allPathways:
                        item.pathway_id = id
                    elif "path:map" + koEntry.title[:5] in allRefPathways:
                        item.pathway_id = "path:map" + koEntry.title[:5]
                    else:
                        item.pathway_id = None

                for child in koEntry.entries:
                    if child in allClasses:
                        _walkCreate(child, item)

            for koEntry in self.koOrthology:
                if koEntry in allClasses:
                    _walkCreate(koEntry, self.listView)

            self.listView.update()
        else:
            self.listView.setRootIsDecorated(False)
            pathways = self.pathways.items()
            pathways = sorted(pathways, key=lambda item: item[1][1])

            for id, (genes, p_value, ref) in pathways:
                item = QTreeWidgetItem(self.listView)
                item.setText(0, kegg_pathways.get_entry(id).name)
                item.setText(1, "%.5f" % p_value)
                item.setText(2, "%i of %i" % (len(genes), len(self.genes)))
                item.setText(3, "%i of %i" % (ref, len(self.referenceGenes)))
                item.pathway_id = id
                items.append(item)

        self.bestPValueItem = items and items[0] or None
        self.listView.expandAll()
        for i in range(4):
            self.listView.resizeColumnToContents(i)

        if self.bestPValueItem:
            index = self.listView.indexFromItem(self.bestPValueItem)
            self.listView.selectionModel().select(
                index, QItemSelectionModel.ClearAndSelect
            )

    def UpdatePathwayView(self):
        items = self.listView.selectedItems()

        if len(items) > 0:
            item = items[0]
        else:
            item = None

        self.commit()
        item = item or self.bestPValueItem
        if not item or not item.pathway_id:
            self.pathwayView.SetPathway(None)
            return

        def get_kgml_and_image(pathway_id):
            """Return an initialized KEGGPathway with pre-cached data"""
            p = kegg.KEGGPathway(pathway_id)
            p._get_kgml()  # makes sure the kgml file is downloaded
            p._get_image_filename()  # makes sure the image is downloaded
            return (pathway_id, p)

        self.setEnabled(False)
        self._pathwayTask = concurrent.Task(
            function=lambda: get_kgml_and_image(item.pathway_id)
        )
        self._pathwayTask.finished.connect(self._onPathwayTaskFinshed)
        self._executor.submit(self._pathwayTask)

    def _onPathwayTaskFinshed(self):
        self.setEnabled(True)
        pathway_id, self.pathway = self._pathwayTask.result()
        self.pathwayView.SetPathway(
            self.pathway,
            self.pathways.get(pathway_id, [[]])[0]
        )

    def UpdatePathwayViewTransform(self):
        self.pathwayView.updateTransform()

    def Update(self):
        """
        Update (recompute enriched pathways) the widget state.
        """
        if not self.data:
            return

        self.error(0)
        self.information(0)

        # XXX: Check data in setData, do not even allow this to be executed if
        # data has no genes
        try:
            genes = self.GeneNamesFromData(self.data)
        except ValueError:
            self.error(0, "Cannot extract gene names from input.")
            genes = []

        if not self.useAttrNames and any("," in gene for gene in genes):
            genes = reduce(add, (split_and_strip(gene, ",")
                                 for gene in genes),
                           [])
            self.information(0,
                             "Separators detected in input gene names. "
                             "Assuming multiple genes per instance.")

        self.queryGenes = genes

        self.information(1)
        reference = None
        if self.useReference and self.refData:
            reference = self.GeneNamesFromData(self.refData)
            if not self.useAttrNames \
                    and any("," in gene for gene in reference):
                reference = reduce(add, (split_and_strip(gene, ",")
                                         for gene in reference),
                                   [])
                self.information(1,
                                 "Separators detected in reference gene "
                                 "names. Assuming multiple genes per "
                                 "instance.")

        org_code = self.SelectedOrganismCode()

        from orangecontrib.bioinformatics.ncbi.gene import GeneMatcher
        gm = GeneMatcher(kegg.to_taxid(org_code))
        gm.genes = genes
        gm.run_matcher()
        mapped_genes = {gene: str(ncbi_id) for gene, ncbi_id in gm.map_input_to_ncbi().items()}

        def run_enrichment(org_code, genes, reference=None, progress=None):
            org = kegg.KEGGOrganism(org_code)
            if reference is None:
                reference = org.get_ncbi_ids()

            # This is here just to keep widget working without any major changes.
            # map not needed, geneMatcher will not work on widget level.
            unique_genes = genes
            unique_ref_genes = dict([(gene, gene) for gene in set(reference)])

            taxid = kegg.to_taxid(org.org_code)
            # Map the taxid back to standard 'common' taxids
            # (as used by 'geneset') if applicable
            r_tax_map = dict((v, k) for k, v in
                             kegg.KEGGGenome.TAXID_MAP.items())
            if taxid in r_tax_map:
                taxid = r_tax_map[taxid]

            # We use the kegg pathway gene sets provided by 'geneset' for
            # the enrichment calculation.

            kegg_api = kegg.api.CachedKeggApi()
            linkmap = kegg_api.link(org.org_code, "pathway")
            converted_ids = kegg_api.conv(org.org_code, 'ncbi-geneid')
            kegg_sets = relation_list_to_multimap(linkmap, dict((gene.upper(), ncbi.split(':')[-1])
                                                                for ncbi, gene in converted_ids))

            kegg_sets = geneset.GeneSets(input=kegg_sets)

            pathways = pathway_enrichment(
                kegg_sets, unique_genes.values(),
                unique_ref_genes.keys(),
                callback=progress
            )
            # Ensure that pathway entries are pre-cached for later use in the
            # list/tree view
            kegg_pathways = kegg.KEGGPathways()
            kegg_pathways.pre_cache(
                pathways.keys(), progress_callback=progress
            )

            return pathways, org, unique_genes, unique_ref_genes

        self.progressBarInit()
        self.setEnabled(False)
        self.infoLabel.setText("Retrieving...\n")

        progress = concurrent.methodinvoke(self, "setProgress", (float,))

        self._enrichTask = concurrent.Task(
            function=lambda:
                run_enrichment(org_code, mapped_genes, reference, progress)
        )
        self._enrichTask.finished.connect(self._onEnrichTaskFinished)
        self._executor.submit(self._enrichTask)

    def _onEnrichTaskFinished(self):
        self.setEnabled(True)
        self.setBlocking(False)
        try:
            pathways, org, unique_genes, unique_ref_genes = \
                self._enrichTask.result()
        except Exception:
            raise

        self.progressBarFinished()

        self.org = org
        self.genes = unique_genes.keys()
        self.uniqueGenesDict = {ncbi_id: input_name for input_name, ncbi_id in unique_genes.items()}
        self.revUniqueGenesDict = dict([(val, key) for key, val in
                                        self.uniqueGenesDict.items()])
        self.referenceGenes = unique_ref_genes.keys()
        self.pathways = pathways

        if not self.pathways:
            self.warning(0, "No enriched pathways found.")
        else:
            self.warning(0)

        count = len(set(self.queryGenes))
        self.infoLabel.setText(
            "%i unique gene names on input\n"
            "%i (%.1f%%) genes names matched" %
            (count, len(unique_genes),
             100.0 * len(unique_genes) / count if count else 0.0)
        )

        self.UpdateListView()

    @Slot(float)
    def setProgress(self, value):
        if self.__in_setProgress:
            return

        self.__in_setProgress = True
        self.progressBarSet(value)
        self.__in_setProgress = False

    def GeneNamesFromData(self, data):
        """
        Extract and return gene names from `data`.
        """
        if self.useAttrNames:
            genes = [str(v.name).strip() for v in data.domain.attributes]
        elif self.geneAttrCandidates:
            assert 0 <= self.geneAttrIndex < len(self.geneAttrCandidates)
            geneAttr = self.geneAttrCandidates[self.geneAttrIndex]
            genes = [str(e[geneAttr]) for e in data
                     if not numpy.isnan(e[geneAttr])]
        else:
            raise ValueError("No gene names in data.")
        return genes

    def SelectedOrganismCode(self):
        """
        Return the selected organism code.
        """
        return self.organismCodes[min(self.organismIndex,
                                      len(self.organismCodes) - 1)]

    def selectAll(self):
        """
        Select all items in the pathway view.
        """
        changed = False
        scene = self.pathwayView.scene()
        with disconnected(scene.selectionChanged, self._onSelectionChanged):
            for item in scene.items():
                if item.flags() & QGraphicsItem.ItemIsSelectable and \
                        not item.isSelected():
                    item.setSelected(True)
                    changed = True
        if changed:
            self._onSelectionChanged()

    def _onSelectionChanged(self):
        # Item selection in the pathwayView/scene has changed
        self.commit()

    def commit(self):
        if self.data:
            selectedItems = self.pathwayView.scene().selectedItems()
            selectedGenes = reduce(set.union, [item.marked_objects
                                               for item in selectedItems],
                                   set())

            if self.useAttrNames:
                selected = [self.data.domain[self.uniqueGenesDict[gene]]
                            for gene in selectedGenes]
#                 newDomain = Orange.data.Domain(selectedVars, 0)
                data = self.data[:, selected]
#                 data = Orange.data.Table(newDomain, self.data)
                self.send("Selected Data", data)
            elif self.geneAttrCandidates:
                assert 0 <= self.geneAttrIndex < len(self.geneAttrCandidates)
                geneAttr = self.geneAttrCandidates[self.geneAttrIndex]
                selectedIndices = []
                otherIndices = []
                for i, ex in enumerate(self.data):
                    names = [self.revUniqueGenesDict.get(name, None)
                             for name in split_and_strip(str(ex[geneAttr]), ",")]
                    if any(name and name in selectedGenes for name in names):
                        selectedIndices.append(i)
                    else:
                        otherIndices.append(i)

                if selectedIndices:
                    selected = self.data[selectedIndices]
                else:
                    selected = None

                if otherIndices:
                    other = self.data[otherIndices]
                else:
                    other = None

                self.send("Selected Data", selected)
                self.send("Unselected Data", other)
        else:
            self.send("Selected Data", None)
            self.send("Unselected Data", None)

    def ClearCache(self):
        kegg.caching.clear_cache()

    def onDeleteWidget(self):
        """
        Called before the widget is removed from the canvas.
        """
        super().onDeleteWidget()

        self.org = None
        self._executor.shutdown(wait=False)
        gc.collect()  # Force collection (WHY?)

    def sizeHint(self):
        return QSize(1024, 720)
class OWGEODatasets(OWWidget, ConcurrentWidgetMixin):
    name = "GEO Data Sets"
    description = "Access to Gene Expression Omnibus data sets."
    icon = "icons/OWGEODatasets.svg"
    priority = 2

    class Outputs:
        gds_data = Output("Expression Data", Table)

    search_pattern = Setting('')
    auto_commit = Setting(True)
    genes_as_rows = Setting(False)
    mergeSpots = Setting(True)
    selected_gds = Setting(None)
    gdsSelectionStates = Setting({})
    splitter_settings = Setting((
        b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xea\x00\x00\x00\xd7\x01\x00\x00\x00\x07\x01\x00\x00\x00\x02',
        b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xb5\x00\x00\x02\x10\x01\x00\x00\x00\x07\x01\x00\x00\x00\x01',
    ))

    def __init__(self):
        OWWidget.__init__(self)
        ConcurrentWidgetMixin.__init__(self)

        self.gds_info: Optional[GDSInfo] = GDSInfo(
        )  # TODO: handle possible exceptions
        self.gds_data: Optional[Table] = None

        # Control area
        box = widgetBox(self.controlArea, 'Info', addSpace=True)
        self.infoBox = widgetLabel(box, 'Initializing\n\n')

        box = widgetBox(self.controlArea, 'Output', addSpace=True)
        radioButtonsInBox(box,
                          self,
                          'genes_as_rows',
                          ['Samples in rows', 'Genes in rows'],
                          callback=self._run)
        separator(box)

        rubber(self.controlArea)
        auto_commit(self.controlArea,
                    self,
                    'auto_commit',
                    '&Commit',
                    box=False)

        # Main Area

        # Filter widget
        self.filter = lineEdit(self.mainArea,
                               self,
                               'search_pattern',
                               'Filter:',
                               callbackOnType=True,
                               callback=self._apply_filter)
        self.mainArea.layout().addWidget(self.filter)

        splitter_vertical = QSplitter(Qt.Vertical, self.mainArea)

        self.mainArea.layout().addWidget(splitter_vertical)

        # set table view
        self.table_view = QTableView(splitter_vertical)
        self.table_view.setShowGrid(False)
        self.table_view.setSortingEnabled(True)
        self.table_view.sortByColumn(1, Qt.AscendingOrder)
        self.table_view.setAlternatingRowColors(True)
        self.table_view.verticalHeader().setVisible(False)
        self.table_view.setSelectionBehavior(QAbstractItemView.SelectRows)
        self.table_view.setSelectionMode(QAbstractItemView.SingleSelection)
        self.table_view.viewport().setMouseTracking(True)
        self.table_view.setSizeAdjustPolicy(
            QAbstractScrollArea.AdjustToContents)

        self.table_model = GEODatasetsModel()
        self.table_model.initialize(self.gds_info)
        self.table_view.setModel(self.table_model)
        self.table_model.show_table()

        self.table_view.horizontalHeader().setStretchLastSection(True)
        self.table_view.resizeColumnsToContents()

        v_header = self.table_view.verticalHeader()
        option = self.table_view.viewOptions()
        size = self.table_view.style().sizeFromContents(
            QStyle.CT_ItemViewItem, option, QSize(20, 20), self.table_view)

        v_header.setDefaultSectionSize(size.height() + 2)
        v_header.setMinimumSectionSize(5)

        # set item delegates
        self.table_view.setItemDelegateForColumn(
            self.table_model.pubmedid_col,
            LinkStyledItemDelegate(self.table_view))
        self.table_view.setItemDelegateForColumn(
            self.table_model.gds_id_col,
            LinkStyledItemDelegate(self.table_view))
        self.table_view.setItemDelegateForColumn(
            self.table_model.indicator_col,
            IndicatorItemDelegate(self.table_view, role=Qt.DisplayRole),
        )

        splitter_horizontal = QSplitter(Qt.Horizontal, splitter_vertical)

        # Description Widget
        box = widgetBox(splitter_horizontal, 'Description')
        self.description_widget = widgetLabel(box, '')
        self.description_widget.setWordWrap(True)
        rubber(box)

        # Sample Annotations Widget
        box = widgetBox(splitter_horizontal, 'Sample Annotations')
        self.annotations_widget = QTreeWidget(box)
        self.annotations_widget.setHeaderLabels(
            ['Type (Sample annotations)', 'Sample count'])
        self.annotations_widget.setRootIsDecorated(True)
        box.layout().addWidget(self.annotations_widget)
        self._annotations_updating = False
        self.annotations_widget.itemChanged.connect(
            self.on_annotation_selection_changed)
        self.splitters = splitter_vertical, splitter_horizontal

        for sp, setting in zip(self.splitters, self.splitter_settings):
            sp.splitterMoved.connect(self._splitter_moved)
            sp.restoreState(setting)

        self.table_view.selectionModel().selectionChanged.connect(
            self.on_gds_selection_changed)
        self._apply_filter()

        self._run()

    def _splitter_moved(self, *args):
        self.splitter_settings = [
            bytes(sp.saveState()) for sp in self.splitters
        ]

    def _set_description_widget(self):
        self.description_widget.setText(
            self.selected_gds.get('description', 'Description not available.'))

    def _set_annotations_widget(self, gds):
        self._annotations_updating = True
        self.annotations_widget.clear()

        annotations = defaultdict(set)
        subsets_count = {}

        for desc in gds['subsets']:
            annotations[desc['type']].add(desc['description'])
            subsets_count[desc['description']] = str(len(desc['sample_id']))

        for _type, subsets in annotations.items():
            key = (gds["name"], _type)
            parent = QTreeWidgetItem(self.annotations_widget, [_type])
            parent.key = key
            for subset in subsets:
                key = (gds['name'], _type, subset)
                item = QTreeWidgetItem(
                    parent, [subset, subsets_count.get(subset, '')])
                item.setFlags(item.flags() | Qt.ItemIsUserCheckable)
                item.setCheckState(
                    0, self.gdsSelectionStates.get(key, Qt.Checked))
                item.key = key

        self._annotations_updating = False
        self.annotations_widget.expandAll()
        for i in range(self.annotations_widget.columnCount()):
            self.annotations_widget.resizeColumnToContents(i)

    def _set_selection(self):
        if self.selected_gds is not None:
            index = self.table_model.get_row_index(
                self.selected_gds.get('name'))
            if index is not None:
                self.table_view.selectionModel().blockSignals(True)
                self.table_view.selectRow(index)
                self._handle_selection_changed()
                self.table_view.selectionModel().blockSignals(False)

    def _handle_selection_changed(self):
        if self.table_model.table is not None:
            selection = self.table_view.selectionModel().selectedRows(
                self.table_model.gds_id_col)
            selected_gds_name = selection[0].data(
            ) if len(selection) > 0 else None

            if selected_gds_name:
                self.selected_gds = self.table_model.info.get(
                    selected_gds_name)
                self._set_annotations_widget(self.selected_gds)
                self._set_description_widget()
            else:
                self.annotations_widget.clear()
                self.description_widget.clear()

            self.update_info()

    def _apply_filter(self):
        if self.table_model.table is not None:
            self.table_model.show_table(
                filter_pattern=str(self.search_pattern))
            self._set_selection()
            self.update_info()

    def _run(self):
        if self.selected_gds is not None:
            self.gds_data = None
            self.start(run_download_task, self.selected_gds.get('name'),
                       self.get_selected_samples(), self.genes_as_rows)

    def on_gds_selection_changed(self):
        self._handle_selection_changed()
        self._run()

    def on_annotation_selection_changed(self):
        if self._annotations_updating:
            return
        for i in range(self.annotations_widget.topLevelItemCount()):
            item = self.annotations_widget.topLevelItem(i)
            if 'key' in item.__dict__:
                self.gdsSelectionStates[item.key] = item.checkState(0)
            for j in range(item.childCount()):
                child = item.child(j)
                if 'key' in child.__dict__:
                    self.gdsSelectionStates[child.key] = child.checkState(0)

        self._run()

    def update_info(self):
        all_gds = len(self.table_model.info)
        text = "{} datasets\n{} datasets cached\n".format(
            all_gds, len(local_files.listfiles()))
        filtered = self.table_view.model().rowCount()
        if all_gds != filtered:
            text += "{} after filtering".format(filtered)
        self.infoBox.setText(text)

    def get_selected_samples(self):
        """
        Return the currently selected sample annotations.

        The return value is a list of selected (sample type, sample value)
        tuples.

        .. note:: if some Sample annotation type has no selected values.
                  this method will return all values for it.

        TODO: this could probably be simplified.

        """
        def childiter(item):
            """ Iterate over the children of an QTreeWidgetItem instance.
            """
            for i in range(item.childCount()):
                yield item.child(i)

        samples = []
        unused_types = []
        used_types = []

        for stype in childiter(self.annotations_widget.invisibleRootItem()):
            selected_values = []
            all_values = []
            for sval in childiter(stype):
                value = (str(stype.text(0)), str(sval.text(0)))
                if self.gdsSelectionStates.get(sval.key, True):
                    selected_values.append(value)
                all_values.append(value)
            if selected_values:
                samples.extend(selected_values)
                used_types.append(str(stype.text(0)))
            else:
                # If no sample of sample type is selected we don't filter on it.
                samples.extend(all_values)
                unused_types.append(str(stype.text(0)))

        _samples = defaultdict(list)
        for sample, sample_type in samples:
            _samples[sample].append(sample_type)
        return _samples

    def commit(self):
        self.Outputs.gds_data.send(self.gds_data)

    def on_done(self, result: Result):
        assert isinstance(result.gds_dataset, Table)
        self.gds_data = result.gds_dataset
        self.commit()

        if self.gds_info:
            self.table_model.initialize(self.gds_info)
            self._apply_filter()

    def on_partial_result(self, result: Any) -> None:
        pass

    def onDeleteWidget(self):
        self.shutdown()
        super().onDeleteWidget()

    def send_report(self):
        self.report_items(
            "GEO Dataset",
            [
                ("ID", self.selected_gds['name']),
                ("Title", self.selected_gds['title']),
                ("Organism", self.selected_gds['sample_organism']),
            ],
        )
        self.report_items(
            "Data",
            [
                ("Samples", self.selected_gds['sample_count']),
                ("Features", self.selected_gds['variables']),
                ("Genes", self.selected_gds['genes']),
            ],
        )
        self.report_name("Sample annotations")
        subsets = defaultdict(list)
        for subset in self.selected_gds['subsets']:
            subsets[subset['type']].append(
                (subset['description'], len(subset['sample_id'])))
        self.report_html += "<ul>"
        for _type in subsets:
            self.report_html += "<b>" + _type + ":</b></br>"
            for desc, count in subsets[_type]:
                self.report_html += 9 * "&nbsp" + "<b>{}:</b> {}</br>".format(
                    desc, count)
        self.report_html += "</ul>"
Beispiel #13
0
class OWPIPAx(widget.OWWidget):
    name = "PIPAx"
    description = "Access data from PIPA RNA-Seq database."
    icon = "../widgets/icons/PIPA.svg"
    priority = 35

    inputs = []
    outputs = [("Data", Orange.data.Table)]

    username = settings.Setting("")
    password = settings.Setting("")

    log2 = settings.Setting(False)
    rtypei = settings.Setting(5)  # hardcoded rpkm mapability polya
    excludeconstant = settings.Setting(False)
    joinreplicates = settings.Setting(False)
    #: The stored current selection (in experiments view)
    #: SelectionByKey | None
    currentSelection = settings.Setting(None)
    #: Stored selections (presets)
    #: list of SelectionByKey
    storedSelections = settings.Setting([])
    #: Stored column sort keys (from Sort view)
    #: list of strings
    storedSortingOrder = settings.Setting(
        ["Strain", "Experiment", "Genotype", "Timepoint"])

    experimentsHeaderState = settings.Setting(
        {name: False
         for _, name in HEADER[:ID_INDEX + 1]})

    def __init__(self, parent=None, signalManager=None, name="PIPAx"):
        super().__init__(parent)

        self.selectedExperiments = []
        self.buffer = dicty.CacheSQLite(bufferfile)

        self.searchString = ""

        self.result_types = []
        self.mappings = {}

        self.controlArea.setMaximumWidth(250)
        self.controlArea.setMinimumWidth(250)

        gui.button(self.controlArea, self, "Reload", callback=self.Reload)
        gui.button(self.controlArea,
                   self,
                   "Clear cache",
                   callback=self.clear_cache)

        b = gui.widgetBox(self.controlArea, "Experiment Sets")
        self.selectionSetsWidget = SelectionSetsWidget(self)
        self.selectionSetsWidget.setSizePolicy(QSizePolicy.Preferred,
                                               QSizePolicy.Maximum)

        def store_selections(modified):
            if not modified:
                self.storedSelections = self.selectionSetsWidget.selections

        self.selectionSetsWidget.selectionModified.connect(store_selections)
        b.layout().addWidget(self.selectionSetsWidget)

        gui.separator(self.controlArea)

        b = gui.widgetBox(self.controlArea, "Sort output columns")
        self.columnsSortingWidget = SortedListWidget(self)
        self.columnsSortingWidget.setSizePolicy(QSizePolicy.Preferred,
                                                QSizePolicy.Maximum)

        def store_sort_order():
            self.storedSortingOrder = self.columnsSortingWidget.sortingOrder

        self.columnsSortingWidget.sortingOrderChanged.connect(store_sort_order)
        b.layout().addWidget(self.columnsSortingWidget)
        sorting_model = QStringListModel(SORTING_MODEL_LIST)
        self.columnsSortingWidget.setModel(sorting_model)

        gui.separator(self.controlArea)

        box = gui.widgetBox(self.controlArea, 'Expression Type')
        self.expressionTypesCB = gui.comboBox(box,
                                              self,
                                              "rtypei",
                                              items=[],
                                              callback=self.UpdateResultsList)

        gui.checkBox(self.controlArea, self, "excludeconstant",
                     "Exclude labels with constant values")

        gui.checkBox(self.controlArea, self, "joinreplicates",
                     "Average replicates (use median)")

        gui.checkBox(self.controlArea, self, "log2",
                     "Logarithmic (base 2) transformation")

        self.commit_button = gui.button(self.controlArea,
                                        self,
                                        "&Commit",
                                        callback=self.Commit)
        self.commit_button.setDisabled(True)

        gui.rubber(self.controlArea)

        box = gui.widgetBox(self.controlArea, "Authentication")

        gui.lineEdit(box,
                     self,
                     "username",
                     "Username:"******"password",
                                  "Password:"******"searchString",
                     "Search",
                     callbackOnType=True,
                     callback=self.SearchUpdate)

        self.headerLabels = [t[1] for t in HEADER]

        self.experimentsWidget = QTreeWidget()
        self.experimentsWidget.setHeaderLabels(self.headerLabels)
        self.experimentsWidget.setSelectionMode(QTreeWidget.ExtendedSelection)
        self.experimentsWidget.setRootIsDecorated(False)
        self.experimentsWidget.setSortingEnabled(True)

        contextEventFilter = gui.VisibleHeaderSectionContextEventFilter(
            self.experimentsWidget, self.experimentsWidget)

        self.experimentsWidget.header().installEventFilter(contextEventFilter)
        self.experimentsWidget.setItemDelegateForColumn(
            0, gui.IndicatorItemDelegate(self, role=Qt.DisplayRole))

        self.experimentsWidget.setAlternatingRowColors(True)

        self.experimentsWidget.selectionModel().selectionChanged.connect(
            self.onSelectionChanged)

        self.selectionSetsWidget.setSelectionModel(
            self.experimentsWidget.selectionModel())

        self.mainArea.layout().addWidget(self.experimentsWidget)

        # Restore the selection states from the stored settings
        self.selectionSetsWidget.selections = self.storedSelections
        self.columnsSortingWidget.sortingOrder = self.storedSortingOrder

        self.restoreHeaderState()

        self.experimentsWidget.header().geometriesChanged.connect(
            self.saveHeaderState)

        self.dbc = None

        self.AuthSet()

        QTimer.singleShot(100, self.UpdateExperiments)

    def sizeHint(self):
        return QSize(800, 600)

    def AuthSet(self):
        if len(self.username):
            self.passf.setDisabled(False)
        else:
            self.passf.setDisabled(True)

    def AuthChanged(self):
        self.AuthSet()
        self.ConnectAndUpdate()

    def ConnectAndUpdate(self):
        self.Connect()
        self.UpdateExperiments(reload=True)

    def Connect(self):
        self.error(1)
        self.warning(1)

        def en(x):
            return x if len(x) else None

        self.dbc = dicty.PIPAx(cache=self.buffer,
                               username=en(self.username),
                               password=self.password)

        # check password
        if en(self.username) != None:
            try:
                self.dbc.mappings(reload=True)
            except dicty.AuthenticationError:
                self.error(1, "Wrong username or password")
                self.dbc = None
            except Exception as ex:
                print("Error when contacting the PIPA database", ex)
                sys.excepthook(*sys.exc_info())
                try:  # maybe cached?
                    self.dbc.mappings()
                    self.warning(
                        1, "Can not access database - using cached data.")
                except Exception as ex:
                    self.dbc = None
                    self.error(1, "Can not access database.")

    def Reload(self):
        self.UpdateExperiments(reload=True)

    def clear_cache(self):
        self.buffer.clear()
        self.Reload()

    def rtype(self):
        """Return selected result template type """
        if self.result_types:
            return self.result_types[self.rtypei][0]
        else:
            return "-1"

    def UpdateExperimentTypes(self):
        self.expressionTypesCB.clear()
        items = [desc for _, desc in self.result_types]
        self.expressionTypesCB.addItems(items)
        self.rtypei = max(0, min(self.rtypei, len(self.result_types) - 1))

    def UpdateExperiments(self, reload=False):
        self.experimentsWidget.clear()
        self.items = []

        self.progressBarInit()

        if not self.dbc:
            self.Connect()

        mappings = {}
        result_types = []
        sucind = False  # success indicator for database index

        try:
            mappings = self.dbc.mappings(reload=reload)
            result_types = self.dbc.result_types(reload=reload)
            sucind = True
        except Exception as ex:
            try:
                mappings = self.dbc.mappings()
                result_types = self.dbc.result_types()
                self.warning(0, "Can not access database - using cached data.")
                sucind = True
            except Exception as ex:
                self.error(0, "Can not access database.")

        if sucind:
            self.warning(0)
            self.error(0)

        self.mappings = mappings
        self.result_types = result_types

        self.UpdateExperimentTypes()
        self.UpdateResultsList(reload=reload)

        self.progressBarFinished()

        if self.currentSelection:
            self.currentSelection.select(
                self.experimentsWidget.selectionModel())

        self.handle_commit_button()

    def UpdateResultsList(self, reload=False):

        results_list = {}
        try:
            results_list = self.dbc.results_list(self.rtype(), reload=reload)
        except Exception as ex:
            try:
                results_list = self.dbc.results_list(self.rtype())
            except Exception as ex:
                self.error(0, "Can not access database.")

        self.results_list = results_list
        mappings_key_dict = dict(((m["data_id"], m["id"]), key) \
                                 for key, m in self.mappings.items())

        def mapping_unique_id(annot):
            """Map annotations dict from results_list to unique
            `mappings` ids.
            """
            data_id, mappings_id = annot["data_id"], annot["mappings_id"]
            return mappings_key_dict[data_id, mappings_id]

        elements = []

        # softly change the view so that the selection stays the same

        items_shown = {}
        for i, item in enumerate(self.items):
            c = str(item.text(10))
            items_shown[c] = i

        items_to_show = dict((mapping_unique_id(annot), annot)
                             for annot in self.results_list.values())

        add_items = set(items_to_show) - set(items_shown)
        delete_items = set(items_shown) - set(items_to_show)

        i = 0
        while i < self.experimentsWidget.topLevelItemCount():
            it = self.experimentsWidget.topLevelItem(i)
            if str(it.text(10)) in delete_items:
                self.experimentsWidget.takeTopLevelItem(i)
            else:
                i += 1

        delete_ind = set([items_shown[i] for i in delete_items])
        self.items = [
            it for i, it in enumerate(self.items) if i not in delete_ind
        ]

        for r_annot in [items_to_show[i] for i in add_items]:
            d = defaultdict(lambda: "?", r_annot)
            row_items = [""] + [d.get(key, "?") for key, _ in HEADER[1:]]
            try:
                time_dict = literal_eval(row_items[DATE_INDEX])
                date_rna = date(
                    time_dict["fullYearUTC"],
                    time_dict["monthUTC"] + 1,  # Why is month 0 based?
                    time_dict["dateUTC"])
                row_items[DATE_INDEX] = date_rna.strftime("%x")
            except Exception:
                row_items[DATE_INDEX] = ''

            row_items[ID_INDEX] = mapping_unique_id(r_annot)
            elements.append(row_items)

            ci = MyTreeWidgetItem(self.experimentsWidget, row_items)

            self.items.append(ci)

        for i in range(len(self.headerLabels)):
            self.experimentsWidget.resizeColumnToContents(i)

        # which is the ok buffer version
        # FIXME: what attribute to use for version?
        self.wantbufver = \
            lambda x, ad=self.results_list: \
            defaultdict(lambda: "?", ad[x])["date"]

        self.wantbufver = lambda x: "0"

        self.UpdateCached()

    def UpdateCached(self):
        if self.wantbufver and self.dbc:
            fn = self.dbc.download_key_function()
            result_id_key = dict(((m["data_id"], m["mappings_id"]), key) \
                                 for key, m in self.results_list.items())

            for item in self.items:
                c = str(item.text(10))
                mapping = self.mappings[c]
                data_id, mappings_id = mapping["data_id"], mapping["id"]
                r_id = result_id_key[data_id, mappings_id]
                # Get the buffered version
                buffered = self.dbc.inBuffer(fn(r_id))
                value = " " if buffered == self.wantbufver(r_id) else ""
                item.setData(0, Qt.DisplayRole, value)

    def SearchUpdate(self, string=""):
        for item in self.items:
            item.setHidden(not all(s in item \
                                   for s in self.searchString.split())
                           )

    def Commit(self):
        if not self.dbc:
            self.Connect()

        pb = gui.ProgressBar(self, iterations=100)

        table = None

        ids = []
        for item in self.experimentsWidget.selectedItems():
            unique_id = str(item.text(10))
            annots = self.mappings[unique_id]
            ids.append((annots["data_id"], annots["id"]))

        transfn = None
        if self.log2:
            transfn = lambda x: math.log(x + 1.0, 2)

        reverse_header_dict = dict((name, key) for key, name in HEADER)

        hview = self.experimentsWidget.header()
        shownHeaders = [label for i, label in \
                        list(enumerate(self.headerLabels))[1:] \
                        if not hview.isSectionHidden(i)
                        ]

        allowed_labels = [reverse_header_dict.get(label, label) \
                          for label in shownHeaders]

        if self.joinreplicates and "id" not in allowed_labels:
            # need 'id' labels in join_replicates for attribute names
            allowed_labels.append("id")

        if len(ids):
            table = self.dbc.get_data(
                ids=ids,
                result_type=self.rtype(),
                callback=pb.advance,
                exclude_constant_labels=self.excludeconstant,
                #                          bufver=self.wantbufver,
                transform=transfn,
                allowed_labels=allowed_labels)

            if self.joinreplicates:
                table = dicty.join_replicates(table,
                                              ignorenames=[
                                                  "replicate", "data_id",
                                                  "mappings_id", "data_name",
                                                  "id", "unique_id"
                                              ],
                                              namefn=None,
                                              avg=dicty.median)

            # Sort attributes
            sortOrder = self.columnsSortingWidget.sortingOrder

            all_values = defaultdict(set)
            for at in table.domain.attributes:
                atts = at.attributes
                for name in sortOrder:
                    all_values[name].add(
                        atts.get(reverse_header_dict[name], ""))

            isnum = {}
            for at, vals in all_values.items():
                vals = filter(None, vals)
                try:
                    for a in vals:
                        float(a)
                    isnum[at] = True
                except:
                    isnum[at] = False

            def optfloat(x, at):
                if x == "":
                    return ""
                else:
                    return float(x) if isnum[at] else x

            def sorting_key(attr):
                atts = attr.attributes
                return tuple([optfloat(atts.get(reverse_header_dict[name], ""), name) \
                              for name in sortOrder])

            attributes = sorted(table.domain.attributes, key=sorting_key)

            domain = Orange.data.Domain(attributes, table.domain.class_var,
                                        table.domain.metas)
            table = table.from_table(domain, table)

            data_hints.set_hint(table, "taxid", "352472")
            data_hints.set_hint(table, "genesinrows", False)

            self.send("Data", table)

            self.UpdateCached()

        pb.finish()

    def onSelectionChanged(self, selected, deselected):
        self.handle_commit_button()

    def handle_commit_button(self):
        self.currentSelection = \
            SelectionByKey(self.experimentsWidget.selectionModel().selection(),
                           key=(1, 2, 3, 10))
        self.commit_button.setDisabled(not len(self.currentSelection))

    def saveHeaderState(self):
        hview = self.experimentsWidget.header()
        for i, label in enumerate(self.headerLabels):
            self.experimentsHeaderState[label] = hview.isSectionHidden(i)

    def restoreHeaderState(self):
        hview = self.experimentsWidget.header()
        state = self.experimentsHeaderState
        for i, label in enumerate(self.headerLabels):
            hview.setSectionHidden(i, state.get(label, True))
            self.experimentsWidget.resizeColumnToContents(i)