Ejemplo n.º 1
0
class OWDataSets(widget.OWWidget):
    name = "Data Sets"
    description = "Load a data set from an online repository"
    icon = "icons/DataSets.svg"
    priority = 20
    replaces = ["orangecontrib.prototypes.widgets.owdatasets.OWDataSets"]

    # The following constants can be overridden in a subclass
    # to reuse this widget for a different repository
    # Take care when refactoring! (used in e.g. single-cell)
    INDEX_URL = "http://datasets.orange.biolab.si/"
    DATASET_DIR = "datasets"

    class Error(widget.OWWidget.Error):
        no_remote_datasets = Msg("Could not fetch data set list")

    class Warning(widget.OWWidget.Warning):
        only_local_datasets = Msg("Could not fetch data sets list, only local "
                                  "cached data sets are shown")

    class Outputs:
        data = Output("Data", Orange.data.Table)

    #: Selected data set id
    selected_id = settings.Setting(None)   # type: Optional[str]

    auto_commit = settings.Setting(False)  # type: bool

    #: main area splitter state
    splitter_state = settings.Setting(b'')  # type: bytes
    header_state = settings.Setting(b'')    # type: bytes

    def __init__(self):
        super().__init__()
        self.local_cache_path = os.path.join(data_dir(), self.DATASET_DIR)

        self.__awaiting_state = None  # type: Optional[_FetchState]

        box = gui.widgetBox(self.controlArea, "Info")

        self.infolabel = QLabel(text="Initializing...\n\n")
        box.layout().addWidget(self.infolabel)

        gui.widgetLabel(self.mainArea, "Filter")
        self.filterLineEdit = QLineEdit(
            textChanged=self.filter
        )
        self.mainArea.layout().addWidget(self.filterLineEdit)

        self.splitter = QSplitter(orientation=Qt.Vertical)

        self.view = QTreeView(
            sortingEnabled=True,
            selectionMode=QTreeView.SingleSelection,
            alternatingRowColors=True,
            rootIsDecorated=False,
            editTriggers=QTreeView.NoEditTriggers,
        )

        box = gui.widgetBox(self.splitter, "Description", addToLayout=False)
        self.descriptionlabel = QLabel(
            wordWrap=True,
            textFormat=Qt.RichText,
        )
        self.descriptionlabel = QTextBrowser(
            openExternalLinks=True,
            textInteractionFlags=(Qt.TextSelectableByMouse |
                                  Qt.LinksAccessibleByMouse)
        )
        self.descriptionlabel.setFrameStyle(QTextBrowser.NoFrame)
        # no (white) text background
        self.descriptionlabel.viewport().setAutoFillBackground(False)

        box.layout().addWidget(self.descriptionlabel)
        self.splitter.addWidget(self.view)
        self.splitter.addWidget(box)

        self.splitter.setSizes([300, 200])
        self.splitter.splitterMoved.connect(
            lambda:
            setattr(self, "splitter_state", bytes(self.splitter.saveState()))
        )
        self.mainArea.layout().addWidget(self.splitter)
        self.controlArea.layout().addStretch(10)
        gui.auto_commit(self.controlArea, self, "auto_commit", "Send Data")

        model = QStandardItemModel(self)
        model.setHorizontalHeaderLabels(HEADER)
        proxy = QSortFilterProxyModel()
        proxy.setSourceModel(model)
        proxy.setFilterKeyColumn(-1)
        proxy.setFilterCaseSensitivity(False)
        self.view.setModel(proxy)

        if self.splitter_state:
            self.splitter.restoreState(self.splitter_state)

        self.view.setItemDelegateForColumn(
            Header.Size, SizeDelegate(self))
        self.view.setItemDelegateForColumn(
            Header.Local, gui.IndicatorItemDelegate(self, role=Qt.DisplayRole))
        self.view.setItemDelegateForColumn(
            Header.Instances, NumericalDelegate(self))
        self.view.setItemDelegateForColumn(
            Header.Variables, NumericalDelegate(self))

        self.view.resizeColumnToContents(Header.Local)

        if self.header_state:
            self.view.header().restoreState(self.header_state)

        self.setBlocking(True)
        self.setStatusMessage("Initializing")

        self._executor = ThreadPoolExecutor(max_workers=1)
        f = self._executor.submit(self.list_remote)
        w = FutureWatcher(f, parent=self)
        w.done.connect(self.__set_index)

    @Slot(object)
    def __set_index(self, f):
        # type: (Future) -> None
        # set results from `list_remote` query.
        assert QThread.currentThread() is self.thread()
        assert f.done()
        self.setBlocking(False)
        self.setStatusMessage("")
        allinfolocal = self.list_local()
        try:
            res = f.result()
        except Exception:
            log.exception("Error while fetching updated index")
            if not allinfolocal:
                self.Error.no_remote_datasets()
            else:
                self.Warning.only_local_datasets()
            res = {}

        allinforemote = res  # type: Dict[Tuple[str, str], dict]
        allkeys = set(allinfolocal)
        if allinforemote is not None:
            allkeys = allkeys | set(allinforemote)
        allkeys = sorted(allkeys)

        def info(file_path):
            if file_path in allinforemote:
                info = allinforemote[file_path]
            else:
                info = allinfolocal[file_path]
            islocal = file_path in allinfolocal
            isremote = file_path in allinforemote
            outdated = islocal and isremote and (
                allinforemote[file_path].get('version', '') !=
                allinfolocal[file_path].get('version', ''))
            islocal &= not outdated
            prefix = os.path.join('', *file_path[:-1])
            filename = file_path[-1]

            return namespace(
                prefix=prefix, filename=filename,
                title=info.get("title", filename),
                datetime=info.get("datetime", None),
                description=info.get("description", None),
                references=info.get("references", []),
                seealso=info.get("seealso", []),
                source=info.get("source", None),
                year=info.get("year", None),
                instances=info.get("instances", None),
                variables=info.get("variables", None),
                target=info.get("target", None),
                missing=info.get("missing", None),
                tags=info.get("tags", []),
                size=info.get("size", None),
                islocal=islocal,
                outdated=outdated
            )

        model = QStandardItemModel(self)
        model.setHorizontalHeaderLabels(HEADER)

        current_index = -1
        for i, file_path in enumerate(allkeys):
            datainfo = info(file_path)
            item1 = QStandardItem()
            item1.setData(" " if datainfo.islocal else "", Qt.DisplayRole)
            item1.setData(datainfo, Qt.UserRole)
            item2 = QStandardItem(datainfo.title)
            item3 = QStandardItem()
            item3.setData(datainfo.size, Qt.DisplayRole)
            item4 = QStandardItem()
            item4.setData(datainfo.instances, Qt.DisplayRole)
            item5 = QStandardItem()
            item5.setData(datainfo.variables, Qt.DisplayRole)
            item6 = QStandardItem()
            item6.setData(datainfo.target, Qt.DisplayRole)
            if datainfo.target:
                item6.setIcon(variable_icon(datainfo.target))
            item7 = QStandardItem()
            item7.setData(", ".join(datainfo.tags) if datainfo.tags else "",
                          Qt.DisplayRole)
            row = [item1, item2, item3, item4, item5, item6, item7]
            model.appendRow(row)

            if os.path.join(*file_path) == self.selected_id:
                current_index = i

        hs = self.view.header().saveState()
        model_ = self.view.model().sourceModel()
        self.view.model().setSourceModel(model)
        self.view.header().restoreState(hs)
        model_.deleteLater()
        model_.setParent(None)
        self.view.selectionModel().selectionChanged.connect(
            self.__on_selection
        )
        # Update the info text
        self.infolabel.setText(format_info(model.rowCount(), len(allinfolocal)))

        if current_index != -1:
            selmodel = self.view.selectionModel()
            selmodel.select(
                self.view.model().mapFromSource(model.index(current_index, 0)),
                QItemSelectionModel.ClearAndSelect | QItemSelectionModel.Rows)

    def __update_cached_state(self):
        model = self.view.model().sourceModel()
        localinfo = self.list_local()
        assert isinstance(model, QStandardItemModel)
        allinfo = []
        for i in range(model.rowCount()):
            item = model.item(i, 0)
            info = item.data(Qt.UserRole)
            info.islocal = (info.prefix, info.filename) in localinfo
            item.setData(" " if info.islocal else "", Qt.DisplayRole)
            allinfo.append(info)

        self.infolabel.setText(format_info(
            model.rowCount(), sum(info.islocal for info in allinfo)))

    def selected_dataset(self):
        """
        Return the current selected data set info or None if not selected

        Returns
        -------
        info : Optional[namespace]
        """
        rows = self.view.selectionModel().selectedRows(0)
        assert 0 <= len(rows) <= 1
        current = rows[0] if rows else None  # type: Optional[QModelIndex]
        if current is not None:
            info = current.data(Qt.UserRole)
            assert isinstance(info, namespace)
        else:
            info = None
        return info

    def filter(self):
        filter_string = self.filterLineEdit.text().strip()
        proxyModel = self.view.model()
        if proxyModel:
            proxyModel.setFilterFixedString(filter_string)

    def __on_selection(self):
        # Main data sets view selection has changed
        rows = self.view.selectionModel().selectedRows(0)
        assert 0 <= len(rows) <= 1
        current = rows[0] if rows else None  # type: Optional[QModelIndex]
        if current is not None:
            current = self.view.model().mapToSource(current)
            di = current.data(Qt.UserRole)
            text = description_html(di)
            self.descriptionlabel.setText(text)
            self.selected_id = os.path.join(di.prefix, di.filename)
        else:
            self.descriptionlabel.setText("")
            self.selected_id = None

        self.commit()

    def commit(self):
        """
        Commit a dataset to the output immediately (if available locally) or
        schedule download background and an eventual send.

        During the download the widget is in blocking state
        (OWWidget.isBlocking)
        """
        di = self.selected_dataset()
        if di is not None:
            self.Error.clear()

            if self.__awaiting_state is not None:
                # disconnect from the __commit_complete
                self.__awaiting_state.watcher.done.disconnect(
                    self.__commit_complete)
                # .. and connect to update_cached_state
                # self.__awaiting_state.watcher.done.connect(
                #     self.__update_cached_state)
                # TODO: There are possible pending __progress_advance queued
                self.__awaiting_state.pb.advance.disconnect(
                    self.__progress_advance)
                self.progressBarFinished(processEvents=None)
                self.__awaiting_state = None

            if not di.islocal:
                pr = progress()
                callback = lambda pr=pr: pr.advance.emit()
                pr.advance.connect(self.__progress_advance, Qt.QueuedConnection)

                self.progressBarInit(processEvents=None)
                self.setStatusMessage("Fetching...")
                self.setBlocking(True)

                f = self._executor.submit(
                    ensure_local, self.INDEX_URL, di.prefix, di.filename,
                    self.local_cache_path, force=di.outdated,
                    progress_advance=callback)
                w = FutureWatcher(f, parent=self)
                w.done.connect(self.__commit_complete)
                self.__awaiting_state = _FetchState(f, w, pr)
            else:
                self.setStatusMessage("")
                self.setBlocking(False)
                self.commit_cached(di.prefix, di.filename)
        else:
            self.Outputs.data.send(None)

    @Slot(object)
    def __commit_complete(self, f):
        # complete the commit operation after the required file has been
        # downloaded
        assert QThread.currentThread() is self.thread()
        assert self.__awaiting_state is not None
        assert self.__awaiting_state.future is f

        if self.isBlocking():
            self.progressBarFinished(processEvents=None)
            self.setBlocking(False)
            self.setStatusMessage("")

        self.__awaiting_state = None

        try:
            path = f.result()
        except Exception as ex:
            log.exception("Error:")
            self.error(format_exception(ex))
            path = None

        self.__update_cached_state()

        if path is not None:
            data = Orange.data.Table(path)
        else:
            data = None
        self.Outputs.data.send(data)

    def commit_cached(self, prefix, filename):
        path = LocalFiles(self.local_cache_path).localpath(prefix, filename)
        self.Outputs.data.send(Orange.data.Table(path))

    @Slot()
    def __progress_advance(self):
        assert QThread.currentThread() is self.thread()
        self.progressBarAdvance(1, processEvents=None)

    def onDeleteWidget(self):
        super().onDeleteWidget()
        if self.__awaiting_state is not None:
            self.__awaiting_state.watcher.done.disconnect(self.__commit_complete)
            self.__awaiting_state.pb.advance.disconnect(self.__progress_advance)
            self.__awaiting_state = None

    def sizeHint(self):
        return QSize(900, 600)

    def closeEvent(self, event):
        self.splitter_state = bytes(self.splitter.saveState())
        self.header_state = bytes(self.view.header().saveState())
        super().closeEvent(event)

    def list_remote(self):
        # type: () -> Dict[Tuple[str, str], dict]
        client = ServerFiles(server=self.INDEX_URL)
        return client.allinfo()

    def list_local(self):
        # type: () -> Dict[Tuple[str, str], dict]
        return LocalFiles(self.local_cache_path).allinfo()
Ejemplo n.º 2
0
class OWDataSets(OWWidget):
    name = "数据集(Datasets)"
    description = "从联机存储库加载数据集"
    icon = "icons/DataSets.svg"
    priority = 20
    replaces = ["orangecontrib.prototypes.widgets.owdatasets.OWDataSets"]
    keywords = ["online"]

    want_control_area = False

    # The following constants can be overridden in a subclass
    # to reuse this widget for a different repository
    # Take care when refactoring! (used in e.g. single-cell)
    INDEX_URL = "https://datasets.biolab.si/"
    DATASET_DIR = "datasets"

    # override HEADER_SCHEMA to define new columns
    # if schema is changed override methods: self.assign_delegates and
    # self.create_model
    HEADER_SCHEMA = [
        ['islocal', {'label': ''}],
        ['title', {'label': '标题'}],
        ['size', {'label': '大小'}],
        ['instances', {'label': '样本数目'}],
        ['variables', {'label': '变量数目'}],
        ['target', {'label': '目标'}],
        ['tags', {'label': '标签'}]
    ]  # type: List[str, dict]

    IndicatorBrushes = (QBrush(Qt.darkGray), QBrush(QColor(0, 192, 0)))

    class Error(OWWidget.Error):
        no_remote_datasets = Msg("Could not fetch dataset list")

    class Warning(OWWidget.Warning):
        only_local_datasets = Msg("Could not fetch datasets list, only local "
                                  "cached datasets are shown")

    class Outputs:
        data = Output("数据(Data)", Orange.data.Table, replaces=['Data'])

    #: Selected dataset id
    selected_id = settings.Setting(None)   # type: Optional[str]

    #: main area splitter state
    splitter_state = settings.Setting(b'')  # type: bytes
    header_state = settings.Setting(b'')    # type: bytes

    def __init__(self):
        super().__init__()
        self.allinfo_local = {}
        self.allinfo_remote = {}

        self.local_cache_path = os.path.join(data_dir(), self.DATASET_DIR)
        # current_output does not equal selected_id when, for instance, the
        # data is still downloading
        self.current_output = None

        self._header_labels = [
            header['label'] for _, header in self.HEADER_SCHEMA]
        self._header_index = namedtuple(
            '_header_index', [info_tag for info_tag, _ in self.HEADER_SCHEMA])
        self.Header = self._header_index(
            *[index for index, _ in enumerate(self._header_labels)])

        self.__awaiting_state = None  # type: Optional[_FetchState]

        self.filterLineEdit = QLineEdit(
            textChanged=self.filter, placeholderText="Search for data set ..."
        )
        self.mainArea.layout().addWidget(self.filterLineEdit)

        self.splitter = QSplitter(orientation=Qt.Vertical)

        self.view = TreeViewWithReturn(
            sortingEnabled=True,
            selectionMode=QTreeView.SingleSelection,
            alternatingRowColors=True,
            rootIsDecorated=False,
            editTriggers=QTreeView.NoEditTriggers,
            uniformRowHeights=True,
            toolTip="Press Return or double-click to send"
        )
        # the method doesn't exists yet, pylint: disable=unnecessary-lambda
        self.view.doubleClicked.connect(self.commit)
        self.view.returnPressed.connect(self.commit)
        box = gui.widgetBox(self.splitter, "说明", addToLayout=False)
        self.descriptionlabel = QLabel(
            wordWrap=True,
            textFormat=Qt.RichText,
        )
        self.descriptionlabel = QTextBrowser(
            openExternalLinks=True,
            textInteractionFlags=(Qt.TextSelectableByMouse |
                                  Qt.LinksAccessibleByMouse)
        )
        self.descriptionlabel.setFrameStyle(QTextBrowser.NoFrame)
        # no (white) text background
        self.descriptionlabel.viewport().setAutoFillBackground(False)

        box.layout().addWidget(self.descriptionlabel)
        self.splitter.addWidget(self.view)
        self.splitter.addWidget(box)

        self.splitter.setSizes([300, 200])
        self.splitter.splitterMoved.connect(
            lambda:
            setattr(self, "splitter_state", bytes(self.splitter.saveState()))
        )
        self.mainArea.layout().addWidget(self.splitter)

        proxy = QSortFilterProxyModel()
        proxy.setFilterKeyColumn(-1)
        proxy.setFilterCaseSensitivity(False)
        self.view.setModel(proxy)

        if self.splitter_state:
            self.splitter.restoreState(self.splitter_state)

        self.assign_delegates()

        self.setBlocking(True)
        self.setStatusMessage("Initializing")

        self._executor = ThreadPoolExecutor(max_workers=1)
        f = self._executor.submit(self.list_remote)
        w = FutureWatcher(f, parent=self)
        w.done.connect(self.__set_index)

    def assign_delegates(self):
        # NOTE: All columns must have size hinting delegates.
        # QTreeView queries only the columns displayed in the viewport so
        # the layout would be different depending in the horizontal scroll
        # position
        self.view.setItemDelegate(UniformHeightDelegate(self))
        self.view.setItemDelegateForColumn(
            self.Header.islocal,
            UniformHeightIndicatorDelegate(self, role=Qt.DisplayRole, indicatorSize=4)
        )
        self.view.setItemDelegateForColumn(
            self.Header.size,
            SizeDelegate(self)
        )
        self.view.setItemDelegateForColumn(
            self.Header.instances,
            NumericalDelegate(self)
        )
        self.view.setItemDelegateForColumn(
            self.Header.variables,
            NumericalDelegate(self)
        )
        self.view.resizeColumnToContents(self.Header.islocal)

    def _parse_info(self, file_path):
        if file_path in self.allinfo_remote:
            info = self.allinfo_remote[file_path]
        else:
            info = self.allinfo_local[file_path]

        islocal = file_path in self.allinfo_local
        isremote = file_path in self.allinfo_remote

        outdated = islocal and isremote and (
            self.allinfo_remote[file_path].get('version', '')
            != self.allinfo_local[file_path].get('version', '')
        )
        islocal &= not outdated

        prefix = os.path.join('', *file_path[:-1])
        filename = file_path[-1]

        return Namespace(file_path=file_path, prefix=prefix, filename=filename,
                         islocal=islocal, outdated=outdated, **info)

    def create_model(self):
        allkeys = set(self.allinfo_local) | set(self.allinfo_remote)
        allkeys = sorted(allkeys)

        model = QStandardItemModel(self)
        model.setHorizontalHeaderLabels(self._header_labels)

        current_index = -1
        for i, file_path in enumerate(allkeys):
            datainfo = self._parse_info(file_path)
            item1 = QStandardItem()
            item1.setData(" " if datainfo.islocal else "", Qt.DisplayRole)
            item1.setData(self.IndicatorBrushes[0], Qt.ForegroundRole)
            item1.setData(datainfo, Qt.UserRole)
            item2 = QStandardItem(datainfo.title)
            item3 = QStandardItem()
            item3.setData(datainfo.size, Qt.DisplayRole)
            item4 = QStandardItem()
            item4.setData(datainfo.instances, Qt.DisplayRole)
            item5 = QStandardItem()
            item5.setData(datainfo.variables, Qt.DisplayRole)
            item6 = QStandardItem()
            item6.setData(datainfo.target, Qt.DisplayRole)
            if datainfo.target:
                item6.setIcon(variable_icon(datainfo.target))
            item7 = QStandardItem()
            item7.setData(", ".join(datainfo.tags) if datainfo.tags else "",
                          Qt.DisplayRole)
            row = [item1, item2, item3, item4, item5, item6, item7]
            model.appendRow(row)

            if os.path.join(*file_path) == self.selected_id:
                current_index = i

        return model, current_index

    @Slot(object)
    def __set_index(self, f):
        # type: (Future) -> None
        # set results from `list_remote` query.
        assert QThread.currentThread() is self.thread()
        assert f.done()
        self.setBlocking(False)
        self.setStatusMessage("")
        self.allinfo_local = self.list_local()

        try:
            self.allinfo_remote = f.result()
        except Exception:  # anytying can happen, pylint: disable=broad-except
            log.exception("Error while fetching updated index")
            if not self.allinfo_local:
                self.Error.no_remote_datasets()
            else:
                self.Warning.only_local_datasets()
            self.allinfo_remote = {}

        model, current_index = self.create_model()

        self.view.model().setSourceModel(model)
        self.view.selectionModel().selectionChanged.connect(
            self.__on_selection
        )

        self.view.resizeColumnToContents(0)
        self.view.setColumnWidth(
            1, min(self.view.sizeHintForColumn(1),
                   self.view.fontMetrics().width("X" * 37)))

        header = self.view.header()
        header.restoreState(self.header_state)

        if current_index != -1:
            selmodel = self.view.selectionModel()
            selmodel.select(
                self.view.model().mapFromSource(model.index(current_index, 0)),
                QItemSelectionModel.ClearAndSelect | QItemSelectionModel.Rows)
            self.commit()

    def __update_cached_state(self):
        model = self.view.model().sourceModel()
        localinfo = self.list_local()
        assert isinstance(model, QStandardItemModel)
        allinfo = []
        for i in range(model.rowCount()):
            item = model.item(i, 0)
            info = item.data(Qt.UserRole)
            is_local = info.file_path in localinfo
            is_current = (is_local and
                          os.path.join(self.local_cache_path, *info.file_path)
                          == self.current_output)
            item.setData(" " * (is_local + is_current), Qt.DisplayRole)
            item.setData(self.IndicatorBrushes[is_current], Qt.ForegroundRole)
            allinfo.append(info)

    def selected_dataset(self):
        """
        Return the current selected dataset info or None if not selected

        Returns
        -------
        info : Optional[Namespace]
        """
        rows = self.view.selectionModel().selectedRows(0)
        assert 0 <= len(rows) <= 1
        current = rows[0] if rows else None  # type: Optional[QModelIndex]
        if current is not None:
            info = current.data(Qt.UserRole)
            assert isinstance(info, Namespace)
        else:
            info = None
        return info

    def filter(self):
        filter_string = self.filterLineEdit.text().strip()
        proxyModel = self.view.model()
        if proxyModel:
            proxyModel.setFilterFixedString(filter_string)

    def __on_selection(self):
        # Main datasets view selection has changed
        rows = self.view.selectionModel().selectedRows(0)
        assert 0 <= len(rows) <= 1
        current = rows[0] if rows else None  # type: Optional[QModelIndex]
        if current is not None:
            current = self.view.model().mapToSource(current)
            di = current.data(Qt.UserRole)
            text = description_html(di)
            self.descriptionlabel.setText(text)
            self.selected_id = os.path.join(di.prefix, di.filename)
        else:
            self.descriptionlabel.setText("")
            self.selected_id = None

    def commit(self):
        """
        Commit a dataset to the output immediately (if available locally) or
        schedule download background and an eventual send.

        During the download the widget is in blocking state
        (OWWidget.isBlocking)
        """
        di = self.selected_dataset()
        if di is not None:
            self.Error.clear()

            if self.__awaiting_state is not None:
                # disconnect from the __commit_complete
                self.__awaiting_state.watcher.done.disconnect(
                    self.__commit_complete)
                # .. and connect to update_cached_state
                # self.__awaiting_state.watcher.done.connect(
                #     self.__update_cached_state)
                # TODO: There are possible pending __progress_advance queued
                self.__awaiting_state.pb.advance.disconnect(
                    self.__progress_advance)
                self.progressBarFinished()
                self.__awaiting_state = None

            if not di.islocal:
                pr = progress()
                callback = lambda pr=pr: pr.advance.emit()
                pr.advance.connect(self.__progress_advance, Qt.QueuedConnection)

                self.progressBarInit()
                self.setStatusMessage("Fetching...")
                self.setBlocking(True)

                f = self._executor.submit(
                    ensure_local, self.INDEX_URL, di.file_path,
                    self.local_cache_path, force=di.outdated,
                    progress_advance=callback)
                w = FutureWatcher(f, parent=self)
                w.done.connect(self.__commit_complete)
                self.__awaiting_state = _FetchState(f, w, pr)
            else:
                self.setStatusMessage("")
                self.setBlocking(False)
                self.commit_cached(di.file_path)
        else:
            self.load_and_output(None)

    @Slot(object)
    def __commit_complete(self, f):
        # complete the commit operation after the required file has been
        # downloaded
        assert QThread.currentThread() is self.thread()
        assert self.__awaiting_state is not None
        assert self.__awaiting_state.future is f

        if self.isBlocking():
            self.progressBarFinished()
            self.setBlocking(False)
            self.setStatusMessage("")

        self.__awaiting_state = None

        try:
            path = f.result()
        # anything can happen here, pylint: disable=broad-except
        except Exception as ex:
            log.exception("Error:")
            self.error(format_exception(ex))
            path = None
        self.load_and_output(path)

    def commit_cached(self, file_path):
        path = LocalFiles(self.local_cache_path).localpath(*file_path)
        self.load_and_output(path)

    @Slot()
    def __progress_advance(self):
        assert QThread.currentThread() is self.thread()
        self.progressBarAdvance(1)

    def onDeleteWidget(self):
        super().onDeleteWidget()
        if self.__awaiting_state is not None:
            self.__awaiting_state.watcher.done.disconnect(self.__commit_complete)
            self.__awaiting_state.pb.advance.disconnect(self.__progress_advance)
            self.__awaiting_state = None

    @staticmethod
    def sizeHint():
        return QSize(1100, 500)

    def closeEvent(self, event):
        self.splitter_state = bytes(self.splitter.saveState())
        self.header_state = bytes(self.view.header().saveState())
        super().closeEvent(event)

    def load_and_output(self, path):
        if path is None:
            self.Outputs.data.send(None)
        else:
            data = self.load_data(path)
            self.Outputs.data.send(data)

        self.current_output = path
        self.__update_cached_state()

    @staticmethod
    def load_data(path):
        return Orange.data.Table(path)

    def list_remote(self):
        # type: () -> Dict[Tuple[str, ...], dict]
        client = ServerFiles(server=self.INDEX_URL)
        return client.allinfo()

    def list_local(self):
        # type: () -> Dict[Tuple[str, ...], dict]
        return LocalFiles(self.local_cache_path).allinfo()
Ejemplo n.º 3
0
class OWDataSets(OWWidget):
    name = "Datasets"
    description = "Load a dataset from an online repository"
    icon = "icons/DataSets.svg"
    priority = 20
    replaces = ["orangecontrib.prototypes.widgets.owdatasets.OWDataSets"]
    keywords = ["online"]

    # The following constants can be overridden in a subclass
    # to reuse this widget for a different repository
    # Take care when refactoring! (used in e.g. single-cell)
    INDEX_URL = "https://datasets.biolab.si/"
    DATASET_DIR = "datasets"

    # override HEADER_SCHEMA to define new columns
    # if schema is changed override methods: self.assign_delegates and
    # self.create_model
    HEADER_SCHEMA = [
        ['islocal', {'label': ''}],
        ['title', {'label': 'Title'}],
        ['size', {'label': 'Size'}],
        ['instances', {'label': 'Instances'}],
        ['variables', {'label': 'Variables'}],
        ['target', {'label': 'Target'}],
        ['tags', {'label': 'Tags'}]
    ]  # type: List[str, dict]

    class Error(OWWidget.Error):
        no_remote_datasets = Msg("Could not fetch dataset list")

    class Warning(OWWidget.Warning):
        only_local_datasets = Msg("Could not fetch datasets list, only local "
                                  "cached datasets are shown")

    class Outputs:
        data = Output("Data", Orange.data.Table)

    #: Selected dataset id
    selected_id = settings.Setting(None)   # type: Optional[str]

    auto_commit = settings.Setting(False)  # type: bool

    #: main area splitter state
    splitter_state = settings.Setting(b'')  # type: bytes
    header_state = settings.Setting(b'')    # type: bytes

    def __init__(self):
        super().__init__()
        self.allinfo_local = {}
        self.allinfo_remote = {}

        self.local_cache_path = os.path.join(data_dir(), self.DATASET_DIR)

        self._header_labels = [
            header['label'] for _, header in self.HEADER_SCHEMA]
        self._header_index = namedtuple(
            '_header_index', [info_tag for info_tag, _ in self.HEADER_SCHEMA])
        self.Header = self._header_index(
            *[index for index, _ in enumerate(self._header_labels)])

        self.__awaiting_state = None  # type: Optional[_FetchState]

        box = gui.widgetBox(self.controlArea, "Info")

        self.infolabel = QLabel(text="Initializing...\n\n")
        box.layout().addWidget(self.infolabel)

        gui.widgetLabel(self.mainArea, "Filter")
        self.filterLineEdit = QLineEdit(
            textChanged=self.filter
        )
        self.mainArea.layout().addWidget(self.filterLineEdit)

        self.splitter = QSplitter(orientation=Qt.Vertical)

        self.view = QTreeView(
            sortingEnabled=True,
            selectionMode=QTreeView.SingleSelection,
            alternatingRowColors=True,
            rootIsDecorated=False,
            editTriggers=QTreeView.NoEditTriggers,
            uniformRowHeights=True,
        )
        # the method doesn't exists yet, pylint: disable=unnecessary-lambda
        self.view.doubleClicked.connect(lambda: self.unconditional_commit())
        box = gui.widgetBox(self.splitter, "Description", addToLayout=False)
        self.descriptionlabel = QLabel(
            wordWrap=True,
            textFormat=Qt.RichText,
        )
        self.descriptionlabel = QTextBrowser(
            openExternalLinks=True,
            textInteractionFlags=(Qt.TextSelectableByMouse |
                                  Qt.LinksAccessibleByMouse)
        )
        self.descriptionlabel.setFrameStyle(QTextBrowser.NoFrame)
        # no (white) text background
        self.descriptionlabel.viewport().setAutoFillBackground(False)

        box.layout().addWidget(self.descriptionlabel)
        self.splitter.addWidget(self.view)
        self.splitter.addWidget(box)

        self.splitter.setSizes([300, 200])
        self.splitter.splitterMoved.connect(
            lambda:
            setattr(self, "splitter_state", bytes(self.splitter.saveState()))
        )
        self.mainArea.layout().addWidget(self.splitter)
        self.controlArea.layout().addStretch(10)
        gui.auto_commit(self.controlArea, self, "auto_commit", "Send Data")

        proxy = QSortFilterProxyModel()
        proxy.setFilterKeyColumn(-1)
        proxy.setFilterCaseSensitivity(False)
        self.view.setModel(proxy)

        if self.splitter_state:
            self.splitter.restoreState(self.splitter_state)

        self.assign_delegates()

        self.setBlocking(True)
        self.setStatusMessage("Initializing")

        self._executor = ThreadPoolExecutor(max_workers=1)
        f = self._executor.submit(self.list_remote)
        w = FutureWatcher(f, parent=self)
        w.done.connect(self.__set_index)

    def assign_delegates(self):
        # NOTE: All columns must have size hinting delegates.
        # QTreeView queries only the columns displayed in the viewport so
        # the layout would be different depending in the horizontal scroll
        # position
        self.view.setItemDelegate(UniformHeightDelegate(self))
        self.view.setItemDelegateForColumn(
            self.Header.islocal,
            UniformHeightIndicatorDelegate(self, role=Qt.DisplayRole)
        )
        self.view.setItemDelegateForColumn(
            self.Header.size,
            SizeDelegate(self)
        )
        self.view.setItemDelegateForColumn(
            self.Header.instances,
            NumericalDelegate(self)
        )
        self.view.setItemDelegateForColumn(
            self.Header.variables,
            NumericalDelegate(self)
        )
        self.view.resizeColumnToContents(self.Header.islocal)

    def _parse_info(self, file_path):
        if file_path in self.allinfo_remote:
            info = self.allinfo_remote[file_path]
        else:
            info = self.allinfo_local[file_path]

        islocal = file_path in self.allinfo_local
        isremote = file_path in self.allinfo_remote

        outdated = islocal and isremote and (
            self.allinfo_remote[file_path].get('version', '')
            != self.allinfo_local[file_path].get('version', '')
        )
        islocal &= not outdated

        prefix = os.path.join('', *file_path[:-1])
        filename = file_path[-1]

        return Namespace(file_path=file_path, prefix=prefix, filename=filename,
                         islocal=islocal, outdated=outdated, **info)

    def create_model(self):
        allkeys = set(self.allinfo_local) | set(self.allinfo_remote)
        allkeys = sorted(allkeys)

        model = QStandardItemModel(self)
        model.setHorizontalHeaderLabels(self._header_labels)

        current_index = -1
        for i, file_path in enumerate(allkeys):
            datainfo = self._parse_info(file_path)
            item1 = QStandardItem()
            item1.setData(" " if datainfo.islocal else "", Qt.DisplayRole)
            item1.setData(datainfo, Qt.UserRole)
            item2 = QStandardItem(datainfo.title)
            item3 = QStandardItem()
            item3.setData(datainfo.size, Qt.DisplayRole)
            item4 = QStandardItem()
            item4.setData(datainfo.instances, Qt.DisplayRole)
            item5 = QStandardItem()
            item5.setData(datainfo.variables, Qt.DisplayRole)
            item6 = QStandardItem()
            item6.setData(datainfo.target, Qt.DisplayRole)
            if datainfo.target:
                item6.setIcon(variable_icon(datainfo.target))
            item7 = QStandardItem()
            item7.setData(", ".join(datainfo.tags) if datainfo.tags else "",
                          Qt.DisplayRole)
            row = [item1, item2, item3, item4, item5, item6, item7]
            model.appendRow(row)

            if os.path.join(*file_path) == self.selected_id:
                current_index = i

        return model, current_index

    @Slot(object)
    def __set_index(self, f):
        # type: (Future) -> None
        # set results from `list_remote` query.
        assert QThread.currentThread() is self.thread()
        assert f.done()
        self.setBlocking(False)
        self.setStatusMessage("")
        self.allinfo_local = self.list_local()

        try:
            self.allinfo_remote = f.result()
        except Exception:  # anytying can happen, pylint: disable=broad-except
            log.exception("Error while fetching updated index")
            if not self.allinfo_local:
                self.Error.no_remote_datasets()
            else:
                self.Warning.only_local_datasets()
            self.allinfo_remote = {}

        model, current_index = self.create_model()

        self.view.model().setSourceModel(model)
        self.view.selectionModel().selectionChanged.connect(
            self.__on_selection
        )

        self.view.resizeColumnToContents(0)
        self.view.setColumnWidth(
            1, min(self.view.sizeHintForColumn(1),
                   self.view.fontMetrics().width("X" * 24)))

        header = self.view.header()
        header.restoreState(self.header_state)

        # Update the info text
        self.infolabel.setText(
            format_info(model.rowCount(), len(self.allinfo_local)))

        if current_index != -1:
            selmodel = self.view.selectionModel()
            selmodel.select(
                self.view.model().mapFromSource(model.index(current_index, 0)),
                QItemSelectionModel.ClearAndSelect | QItemSelectionModel.Rows)

    def __update_cached_state(self):
        model = self.view.model().sourceModel()
        localinfo = self.list_local()
        assert isinstance(model, QStandardItemModel)
        allinfo = []
        for i in range(model.rowCount()):
            item = model.item(i, 0)
            info = item.data(Qt.UserRole)
            info.islocal = info.file_path in localinfo
            item.setData(" " if info.islocal else "", Qt.DisplayRole)
            allinfo.append(info)

        self.infolabel.setText(format_info(
            model.rowCount(), sum(info.islocal for info in allinfo)))

    def selected_dataset(self):
        """
        Return the current selected dataset info or None if not selected

        Returns
        -------
        info : Optional[Namespace]
        """
        rows = self.view.selectionModel().selectedRows(0)
        assert 0 <= len(rows) <= 1
        current = rows[0] if rows else None  # type: Optional[QModelIndex]
        if current is not None:
            info = current.data(Qt.UserRole)
            assert isinstance(info, Namespace)
        else:
            info = None
        return info

    def filter(self):
        filter_string = self.filterLineEdit.text().strip()
        proxyModel = self.view.model()
        if proxyModel:
            proxyModel.setFilterFixedString(filter_string)

    def __on_selection(self):
        # Main datasets view selection has changed
        rows = self.view.selectionModel().selectedRows(0)
        assert 0 <= len(rows) <= 1
        current = rows[0] if rows else None  # type: Optional[QModelIndex]
        if current is not None:
            current = self.view.model().mapToSource(current)
            di = current.data(Qt.UserRole)
            text = description_html(di)
            self.descriptionlabel.setText(text)
            self.selected_id = os.path.join(di.prefix, di.filename)
        else:
            self.descriptionlabel.setText("")
            self.selected_id = None

        self.commit()

    def commit(self):
        """
        Commit a dataset to the output immediately (if available locally) or
        schedule download background and an eventual send.

        During the download the widget is in blocking state
        (OWWidget.isBlocking)
        """
        di = self.selected_dataset()
        if di is not None:
            self.Error.clear()

            if self.__awaiting_state is not None:
                # disconnect from the __commit_complete
                self.__awaiting_state.watcher.done.disconnect(
                    self.__commit_complete)
                # .. and connect to update_cached_state
                # self.__awaiting_state.watcher.done.connect(
                #     self.__update_cached_state)
                # TODO: There are possible pending __progress_advance queued
                self.__awaiting_state.pb.advance.disconnect(
                    self.__progress_advance)
                self.progressBarFinished(processEvents=None)
                self.__awaiting_state = None

            if not di.islocal:
                pr = progress()
                callback = lambda pr=pr: pr.advance.emit()
                pr.advance.connect(self.__progress_advance, Qt.QueuedConnection)

                self.progressBarInit(processEvents=None)
                self.setStatusMessage("Fetching...")
                self.setBlocking(True)

                f = self._executor.submit(
                    ensure_local, self.INDEX_URL, di.file_path,
                    self.local_cache_path, force=di.outdated,
                    progress_advance=callback)
                w = FutureWatcher(f, parent=self)
                w.done.connect(self.__commit_complete)
                self.__awaiting_state = _FetchState(f, w, pr)
            else:
                self.setStatusMessage("")
                self.setBlocking(False)
                self.commit_cached(di.file_path)
        else:
            self.Outputs.data.send(None)

    @Slot(object)
    def __commit_complete(self, f):
        # complete the commit operation after the required file has been
        # downloaded
        assert QThread.currentThread() is self.thread()
        assert self.__awaiting_state is not None
        assert self.__awaiting_state.future is f

        if self.isBlocking():
            self.progressBarFinished(processEvents=None)
            self.setBlocking(False)
            self.setStatusMessage("")

        self.__awaiting_state = None

        try:
            path = f.result()
        # anything can happen here, pylint: disable=broad-except
        except Exception as ex:
            log.exception("Error:")
            self.error(format_exception(ex))
            path = None

        self.__update_cached_state()

        if path is not None:
            data = self.load_data(path)
        else:
            data = None
        self.Outputs.data.send(data)

    def commit_cached(self, file_path):
        path = LocalFiles(self.local_cache_path).localpath(*file_path)
        self.Outputs.data.send(self.load_data(path))

    @Slot()
    def __progress_advance(self):
        assert QThread.currentThread() is self.thread()
        self.progressBarAdvance(1, processEvents=None)

    def onDeleteWidget(self):
        super().onDeleteWidget()
        if self.__awaiting_state is not None:
            self.__awaiting_state.watcher.done.disconnect(self.__commit_complete)
            self.__awaiting_state.pb.advance.disconnect(self.__progress_advance)
            self.__awaiting_state = None

    @staticmethod
    def sizeHint():
        return QSize(900, 600)

    def closeEvent(self, event):
        self.splitter_state = bytes(self.splitter.saveState())
        self.header_state = bytes(self.view.header().saveState())
        super().closeEvent(event)

    def load_data(self, path):  # pylint: disable=no-self-use
        return Orange.data.Table(path)

    def list_remote(self):
        # type: () -> Dict[Tuple[str, ...], dict]
        client = ServerFiles(server=self.INDEX_URL)
        return client.allinfo()

    def list_local(self):
        # type: () -> Dict[Tuple[str, ...], dict]
        return LocalFiles(self.local_cache_path).allinfo()
class OWMarkerGenes(widget.OWWidget):
    name = "Marker Genes"
    icon = 'icons/OWMarkerGenes.svg'
    priority = 130

    replaces = ['orangecontrib.single_cell.widgets.owmarkergenes.OWMarkerGenes']

    class Warning(widget.OWWidget.Warning):
        using_local_files = widget.Msg("Can't connect to serverfiles. Using cached files.")

    class Outputs:
        genes = widget.Output("Genes", Table)

    want_main_area = True
    want_control_area = True

    auto_commit = Setting(True)
    selected_source = Setting("")
    selected_organism = Setting("")
    selected_root_attribute = Setting(0)

    settingsHandler = MarkerGroupContextHandler()  # noqa: N815
    selected_genes = settings.ContextSetting([])

    settings_version = 2

    _data = None
    _available_sources = None

    def __init__(self) -> None:
        super().__init__()

        # define the layout
        main_area = QWidget(self.mainArea)
        self.mainArea.layout().addWidget(main_area)
        layout = QGridLayout()
        main_area.setLayout(layout)
        layout.setContentsMargins(4, 4, 4, 4)

        # filter line edit
        self.filter_line_edit = QLineEdit()
        self.filter_line_edit.setPlaceholderText("Filter marker genes")
        layout.addWidget(self.filter_line_edit, 0, 0, 1, 3)

        # define available markers view
        self.available_markers_view = TreeView()
        box = gui.vBox(self.mainArea, "Available markers", addToLayout=False)
        box.layout().addWidget(self.available_markers_view)
        layout.addWidget(box, 1, 0, 2, 1)

        # create selected markers view
        self.selected_markers_view = TreeView()
        box = gui.vBox(self.mainArea, "Selected markers", addToLayout=False)
        box.layout().addWidget(self.selected_markers_view)
        layout.addWidget(box, 1, 2, 2, 1)

        self.available_markers_view.otherView = self.selected_markers_view
        self.selected_markers_view.otherView = self.available_markers_view

        # buttons
        box = gui.vBox(self.mainArea, addToLayout=False, margin=0)
        layout.addWidget(box, 1, 1, 1, 1)
        self.move_button = gui.button(box, self, ">", callback=self._move_selected)

        self._init_description_area(layout)
        self._init_control_area()
        self._load_data()

    def _init_description_area(self, layout: QLayout) -> None:
        """
        Function define an info area with description of the genes and add it to the layout.
        """
        box = gui.widgetBox(self.mainArea, "Description", addToLayout=False)
        self.descriptionlabel = QTextBrowser(
            openExternalLinks=True, textInteractionFlags=(Qt.TextSelectableByMouse | Qt.LinksAccessibleByMouse)
        )
        box.setMaximumHeight(self.descriptionlabel.fontMetrics().height() * (NUM_LINES_TEXT + 3))

        # description filed
        self.descriptionlabel.setText("Select a gene to see information.")
        self.descriptionlabel.setFrameStyle(QTextBrowser.NoFrame)
        # no (white) text background
        self.descriptionlabel.viewport().setAutoFillBackground(False)
        box.layout().addWidget(self.descriptionlabel)
        layout.addWidget(box, 3, 0, 1, 3)

    def _init_control_area(self) -> None:
        """
        Function defines dropdowns and the button in the control area.
        """
        box = gui.widgetBox(self.controlArea, 'Database', margin=0)
        self.source_index = -1
        self.db_source_cb = gui.comboBox(box, self, 'source_index')
        self.db_source_cb.activated[int].connect(self._set_db_source_index)

        box = gui.widgetBox(self.controlArea, 'Organism', margin=0)
        self.organism_index = -1
        self.group_cb = gui.comboBox(box, self, 'organism_index')
        self.group_cb.activated[int].connect(self._set_group_index)

        box = gui.widgetBox(self.controlArea, 'Group by', margin=0)
        self.group_by_cb = gui.comboBox(
            box, self, 'selected_root_attribute', items=GROUP_BY_ITEMS, callback=self._setup
        )

        gui.rubber(self.controlArea)
        gui.auto_commit(self.controlArea, self, "auto_commit", "Commit")

    def sizeHint(self):
        return super().sizeHint().expandedTo(QSize(900, 500))

    @property
    def available_sources(self) -> dict:
        return self._available_sources

    @available_sources.setter
    def available_sources(self, value: dict) -> None:
        """
        Set _available_sources variable, add them to dropdown, and select the source that was previously.
        """
        self._available_sources = value

        items = sorted(list(value.keys()), reverse=True)  # panglao first
        try:
            idx = items.index(self.selected_source)
        except ValueError:
            idx = -1

        self.db_source_cb.clear()
        self.db_source_cb.addItems(items)

        if idx != -1:
            self.source_index = idx
            self.selected_source = items[idx]
        elif items:
            self.source_index = min(max(self.source_index, 0), len(items) - 1)

        self._set_db_source_index(self.source_index)

    @property
    def data(self) -> Table:
        return self._data

    @data.setter
    def data(self, value: Table):
        """
        Set the source data. The data is then filtered on the first meta column (group).
        Select set dropdown with the groups and select the one that was selected previously.
        """
        self._data = value
        domain = value.domain

        if domain.metas:
            group = domain.metas[0]
            groupcol, _ = value.get_column_view(group)

            if group.is_string:
                group_values = list(set(groupcol))
            elif group.is_discrete:
                group_values = group.values
            else:
                raise TypeError("Invalid column type")
            group_values = sorted(group_values)  # human first

            try:
                idx = group_values.index(self.selected_organism)
            except ValueError:
                idx = -1

            self.group_cb.clear()
            self.group_cb.addItems(group_values)

            if idx != -1:
                self.organism_index = idx
                self.selected_organism = group_values[idx]
            elif group_values:
                self.organism_index = min(max(self.organism_index, 0), len(group_values) - 1)

            self._set_group_index(self.organism_index)

    def _load_data(self) -> None:
        """
        Collect available data sources (marker genes data sets).
        """
        self.Warning.using_local_files.clear()

        found_sources = {}
        try:
            found_sources.update(serverfiles.ServerFiles().allinfo(SERVER_FILES_DOMAIN))
        except requests.exceptions.ConnectionError:
            found_sources.update(serverfiles.allinfo(SERVER_FILES_DOMAIN))
            self.Warning.using_local_files()

        self.available_sources = {item.get('title').split(': ')[-1]: item for item in found_sources.values()}

    def _source_changed(self) -> None:
        """
        Respond on change of the source and download the data.
        """
        if self.available_sources:
            file_name = self.available_sources[self.selected_source]['filename']

            try:
                serverfiles.update(SERVER_FILES_DOMAIN, file_name)
            except requests.exceptions.ConnectionError:
                # try to update file. Ignore network errors.
                pass

            try:
                file_path = serverfiles.localpath_download(SERVER_FILES_DOMAIN, file_name)
            except requests.exceptions.ConnectionError as err:
                # Unexpected error.
                raise err
            self.data = Table.from_file(file_path)

    def _setup(self) -> None:
        """
        Setup the views with data.
        """
        self.closeContext()
        self.selected_genes = []
        self.openContext((self.selected_organism, self.selected_source))
        data_not_selected, data_selected = self._filter_data_group(self.data)

        # add model to available markers view
        group_by = GROUP_BY_ITEMS[self.selected_root_attribute]
        tree_model = TreeModel(data_not_selected, group_by)
        proxy_model = FilterProxyModel(self.filter_line_edit)
        proxy_model.setSourceModel(tree_model)

        self.available_markers_view.setModel(proxy_model)
        self.available_markers_view.selectionModel().selectionChanged.connect(
            partial(self._on_selection_changed, self.available_markers_view)
        )

        tree_model = TreeModel(data_selected, group_by)
        proxy_model = FilterProxyModel(self.filter_line_edit)
        proxy_model.setSourceModel(tree_model)
        self.selected_markers_view.setModel(proxy_model)

        self.selected_markers_view.selectionModel().selectionChanged.connect(
            partial(self._on_selection_changed, self.selected_markers_view)
        )
        self.selected_markers_view.model().sourceModel().data_added.connect(self._selected_markers_changed)
        self.selected_markers_view.model().sourceModel().data_removed.connect(self._selected_markers_changed)

        # update output and messages
        self._selected_markers_changed()

    def _filter_data_group(self, data: Table) -> Tuple[Table, Tuple]:
        """
        Function filter the table based on the selected group (Mouse, Human) and divide them in two groups based on
        selected_data variable.

        Parameters
        ----------
        data
            Table to be filtered

        Returns
        -------
        data_not_selected
            Data that will initially be in available markers view.
        data_selected
            Data that will initially be in selected markers view.
        """
        group = data.domain.metas[0]
        gvec = data.get_column_view(group)[0]

        if group.is_string:
            mask = gvec == self.selected_organism
        else:
            mask = gvec == self.organism_index
        data = data[mask]

        # divide data based on selected_genes variable (context)
        unique_gene_names = np.core.defchararray.add(
            data.get_column_view("Entrez ID")[0].astype(str), data.get_column_view("Cell Type")[0].astype(str)
        )
        mask = np.isin(unique_gene_names, self.selected_genes)
        data_not_selected = data[~mask]
        data_selected = data[mask]
        return data_not_selected, data_selected

    def commit(self) -> None:
        rows = self.selected_markers_view.model().sourceModel().rootItem.get_data_rows()
        if len(rows) > 0:
            metas = [r.metas for r in rows]
            data = Table.from_numpy(self.data.domain, np.empty((len(metas), 0)), metas=np.array(metas))
            # always false for marker genes data tables in single cell
            data.attributes[GENE_AS_ATTRIBUTE_NAME] = False
            # set taxonomy id in data.attributes
            data.attributes[TAX_ID] = MAP_GROUP_TO_TAX_ID.get(self.selected_organism, '')
            # set column id flag
            data.attributes[GENE_ID_COLUMN] = "Entrez ID"
            data.name = 'Marker Genes'
        else:
            data = None
        self.Outputs.genes.send(data)

    def _update_description(self, view: TreeView) -> None:
        """
        Upate the description about the gene. Only in case when one gene is selected.
        """
        selection = self._selected_rows(view)
        qmodel = view.model().sourceModel()

        if len(selection) > 1 or len(selection) == 0 or qmodel.node_from_index(selection[0]).data_row is None:
            self.descriptionlabel.setText("Select a gene to see information.")
        else:
            data_row = qmodel.node_from_index(selection[0]).data_row
            self.descriptionlabel.setHtml(
                f"<b>Gene name:</b> {data_row['Name']}<br/>"
                f"<b>Entrez ID:</b> {data_row['Entrez ID']}<br/>"
                f"<b>Cell Type:</b> {data_row['Cell Type']}<br/>"
                f"<b>Function:</b> {data_row['Function']}<br/>"
                f"<b>Reference:</b> <a href='{data_row['URL']}'>{data_row['Reference']}</a>"
            )

    def _update_data_info(self) -> None:
        """
        Updates output info in the control area.
        """
        sel_model = self.selected_markers_view.model().sourceModel()
        self.info.set_output_summary(f"Selected: {str(len(sel_model))}")

    # callback functions

    def _selected_markers_changed(self) -> None:
        """
        This function is called when markers in the selected view are added or removed.
        """
        rows = self.selected_markers_view.model().sourceModel().rootItem.get_data_rows()
        self.selected_genes = [row["Entrez ID"].value + row["Cell Type"].value for row in rows]
        self._update_data_info()
        self.commit()

    def _on_selection_changed(self, view: TreeView) -> None:
        """
        When selection in one of the view changes in a view button should change a sign in the correct direction and
        other view should reset the selection. Also gene description is updated.
        """
        self.move_button.setText(">" if view is self.available_markers_view else "<")
        if view is self.available_markers_view:
            self.selected_markers_view.clearSelection()
        else:
            self.available_markers_view.clearSelection()
        self._update_description(view)

    def _set_db_source_index(self, source_index: int) -> None:
        """
        Set the index of selected database source - index in a dropdown.
        """
        self.source_index = source_index
        self.selected_source = self.db_source_cb.itemText(source_index)
        self._source_changed()

    def _set_group_index(self, group_index: int) -> None:
        """
        Set the index of organism - index in a dropdown.
        """
        self.organism_index = group_index
        self.selected_organism = self.group_cb.itemText(group_index)
        self._setup()

    def _move_selected(self) -> None:
        """
        Move selected genes when button clicked.
        """
        if self._selected_rows(self.selected_markers_view):
            self._move_selected_from_to(self.selected_markers_view, self.available_markers_view)
        elif self._selected_rows(self.available_markers_view):
            self._move_selected_from_to(self.available_markers_view, self.selected_markers_view)

    # support functions for callbacks

    def _move_selected_from_to(self, src: TreeView, dst: TreeView) -> None:
        """
        Function moves items from src model to dst model.
        """
        selected_items = self._selected_rows(src)

        src_model = src.model().sourceModel()
        dst_model = dst.model().sourceModel()

        # move data as mimeData from source to destination tree view
        mime_data = src_model.mimeData(selected_items)
        # remove nodes from the source view
        src_model.remove_node_list(selected_items)
        dst_model.dropMimeData(mime_data, Qt.MoveAction, -1, -1)

    @staticmethod
    def _selected_rows(view: TreeView) -> List[QModelIndex]:
        """
        Return the selected rows in the view.
        """
        rows = view.selectionModel().selectedRows()
        return list(map(view.model().mapToSource, rows))

    @classmethod
    def migrate_settings(cls, settings, version=0):
        def migrate_to_version_2():
            settings["selected_source"] = settings.pop("selected_db_source", "")
            settings["selected_organism"] = settings.pop("selected_group", "")
            if "context_settings" in settings:
                for co in settings["context_settings"]:
                    co.values["selected_genes"] = [g[0] + g[1] for g in co.values["selected_genes"]]

        if version < 2:
            migrate_to_version_2()
Ejemplo n.º 5
0
class OWBatchImport(widget.OWWidget):
    name = '批量导入'
    description = "数据批量导入"
    icon = 'icons/batchimport.svg'
    priority = 90

    # 定义输出信号
    class Outputs:
        data = Output("Data", Orange.data.Table, doc="Table",
                      default=True)  #输出Orange Table格式的数据
        dataFrame = Output("Data Frame", pd.DataFrame,
                           doc="DataFrame")  #输出Pandas DataFrame格式的数据

    # 不绘制内容区域
    want_main_area = False

    def __init__(self):
        grid = QGridLayout()
        self.dir_label = QLabel("目录:", self)
        self.browse_button = QPushButton(
            "…",
            icon=self.style().standardIcon(QStyle.SP_DirOpenIcon),
            toolTip="Browse filesystem",
            autoDefault=False,
        )
        self.browse_button.clicked.connect(self.browse)
        grid.addWidget(self.dir_label, 0, 1, 1, 1)
        grid.addWidget(self.browse_button, 0, 2, 1, 1)
        self.controlArea.layout().addLayout(grid)

        box = gui.widgetBox(self.controlArea, "Info", addSpace=False)
        self.summary_text = QTextBrowser(
            verticalScrollBarPolicy=Qt.ScrollBarAsNeeded,
            readOnly=True,
        )
        self.summary_text.viewport().setBackgroundRole(QPalette.NoRole)
        self.summary_text.setFrameStyle(QTextBrowser.NoFrame)
        self.summary_text.setMinimumHeight(self.fontMetrics().ascent() * 2 + 4)
        self.summary_text.viewport().setAutoFillBackground(False)
        box.layout().addWidget(self.summary_text)

    @Slot()
    def browse(self, prefixname=None, directory=None):
        """
        Open a file dialog and select a user specified file.
        """
        dir_path = QFileDialog.getExistingDirectory(self, "选择目录", "~/map")
        self.dir_label.setText(dir_path)

        # 将指定目录中的csv文件集合导入并转为pandas的dataframe对象
        df = self._get_data(dir_path)
        self.summary_text.setText("共载入{}条记录。".format(df.shape[0]))

        # 向数据通道发送数据
        self.Outputs.dataFrame.send(df)
        self.Outputs.data.send(pandas_to_table(df))

    def _get_data(self, dir_path):
        filecsv_list = []
        for root, dirs, files in os.walk(dir_path):
            for file in files:
                if os.path.splitext(file)[1] == '.csv':
                    filecsv_list.append(os.path.join(root, file))
        data = pd.DataFrame()
        for csv in filecsv_list:
            df_tmp = pd.read_csv(csv, header=0, encoding='utf-8')
            data = data.append(df_tmp, ignore_index=True)

        return data