Ejemplo n.º 1
0
class FileBrowser(QWidget):

    fileSelectedSignal = pyqtSignal(QString)
    directorySelectedSignal = pyqtSignal(QString)

    def __init__(self, *args):
        super(FileBrowser, self).__init__(*args)

        layout = QVBoxLayout()
        model = QFileSystemModel()

        filters = ["*.jpg", "*.JPG", "*.jpeg", "*.JPEG","*.png","*.PNG"]
        model.setNameFilters(filters)
        
        self.directoryTree = QTreeView()
	self.directoryTree.setModel(model)
        self.directoryTree.currentChanged = self.currentChanged
        self.directoryTree.setSortingEnabled(True)
        self.directoryTree.sortByColumn(0, Qt.AscendingOrder)

        self.fileList = QListWidget()

        layout.addWidget(self.directoryTree)
        self.setLayout(layout)
        
        
        root = model.setRootPath(QDir.homePath())
        self.directoryTree.setRootIndex(root)

    def currentChanged(self, current, previous):
        QTreeView.currentChanged(self.directoryTree, current, previous)
        currentIndex = self.directoryTree.selectionModel().currentIndex()
        path = str ( self.directoryTree.model().filePath(currentIndex) )
        
        if self.directoryTree.model().isDir(currentIndex):
            self.directorySelectedSignal.emit(path)
        else:
            self.directorySelectedSignal.emit(os.path.dirname(path))
            self.fileSelectedSignal.emit(path)
Ejemplo n.º 2
0
class FileBrowser(QWidget):

    fileSelectedSignal = pyqtSignal(QString)
    directorySelectedSignal = pyqtSignal(QString)

    def __init__(self, *args):
        super(FileBrowser, self).__init__(*args)

        layout = QVBoxLayout()
        model = QFileSystemModel()

        filters = ["*.jpg", "*.JPG", "*.jpeg", "*.JPEG", "*.png", "*.PNG"]
        model.setNameFilters(filters)

        self.directoryTree = QTreeView()
        self.directoryTree.setModel(model)
        self.directoryTree.currentChanged = self.currentChanged
        self.directoryTree.setSortingEnabled(True)
        self.directoryTree.sortByColumn(0, Qt.AscendingOrder)

        self.fileList = QListWidget()

        layout.addWidget(self.directoryTree)
        self.setLayout(layout)

        root = model.setRootPath(QDir.homePath())
        self.directoryTree.setRootIndex(root)

    def currentChanged(self, current, previous):
        QTreeView.currentChanged(self.directoryTree, current, previous)
        currentIndex = self.directoryTree.selectionModel().currentIndex()
        path = str(self.directoryTree.model().filePath(currentIndex))

        if self.directoryTree.model().isDir(currentIndex):
            self.directorySelectedSignal.emit(path)
        else:
            self.directorySelectedSignal.emit(os.path.dirname(path))
            self.fileSelectedSignal.emit(path)
Ejemplo n.º 3
0
class OWGeneInfo(widget.OWWidget):
    name = "Gene Info"
    description = "Displays gene information from NCBI and other sources."
    icon = "../widgets/icons/GeneInfo.svg"
    priority = 2010

    inputs = [("Data", Orange.data.Table, "setData")]
    outputs = [("Data Subset", Orange.data.Table)]

    settingsHandler = settings.DomainContextHandler()

    organism_index = settings.ContextSetting(0)
    taxid = settings.ContextSetting("9606")

    gene_attr = settings.ContextSetting(0)

    auto_commit = settings.Setting(False)
    search_string = settings.Setting("")

    useAttr = settings.ContextSetting(False)
    useAltSource = settings.ContextSetting(False)

    def __init__(self, parent=None, ):
        super().__init__(self, parent)

        self.selectionChangedFlag = False

        self.__initialized = False
        self.initfuture = None
        self.itemsfuture = None

        self.infoLabel = gui.widgetLabel(
            gui.widgetBox(self.controlArea, "Info", addSpace=True),
            "Initializing\n"
        )

        self.organisms = None
        self.organismBox = gui.widgetBox(
            self.controlArea, "Organism", addSpace=True)

        self.organismComboBox = gui.comboBox(
            self.organismBox, self, "organism_index",
            callback=self._onSelectedOrganismChanged)

        # For now only support one alt source, with a checkbox
        # In the future this can be extended to multiple selections
        self.altSourceCheck = gui.checkBox(
            self.organismBox, self, "useAltSource",
            "Show information from dictyBase",
            callback=self.onAltSourceChange)

        self.altSourceCheck.hide()

        box = gui.widgetBox(self.controlArea, "Gene names", addSpace=True)
        self.geneAttrComboBox = gui.comboBox(
            box, self, "gene_attr",
            "Gene atttibute", callback=self.updateInfoItems
        )
        self.geneAttrComboBox.setEnabled(not self.useAttr)
        cb = gui.checkBox(box, self, "useAttr", "Use attribute names",
                          callback=self.updateInfoItems)
        cb.toggled[bool].connect(self.geneAttrComboBox.setDisabled)

        gui.auto_commit(self.controlArea, self, "auto_commit", "Commit")

        # A label for dictyExpress link (Why oh god why???)
        self.dictyExpressBox = gui.widgetBox(
            self.controlArea, "Dicty Express")
        self.linkLabel = gui.widgetLabel(self.dictyExpressBox, "")
        self.linkLabel.setOpenExternalLinks(False)
        self.linkLabel.linkActivated.connect(self.onDictyExpressLink)

        self.dictyExpressBox.hide()

        gui.rubber(self.controlArea)

        gui.lineEdit(self.mainArea, self, "search_string", "Filter",
                     callbackOnType=True, callback=self.searchUpdate)

        self.treeWidget = QTreeView(
            self.mainArea,
            selectionMode=QTreeView.ExtendedSelection,
            rootIsDecorated=False,
            uniformRowHeights=True,
            sortingEnabled=True)

        self.treeWidget.setItemDelegate(
            gui.LinkStyledItemDelegate(self.treeWidget))
        self.treeWidget.viewport().setMouseTracking(True)
        self.mainArea.layout().addWidget(self.treeWidget)

        box = gui.widgetBox(self.mainArea, "", orientation="horizontal")
        gui.button(box, self, "Select Filtered", callback=self.selectFiltered)
        gui.button(box, self, "Clear Selection",
                   callback=self.treeWidget.clearSelection)

        self.geneinfo = []
        self.cells = []
        self.row2geneinfo = {}
        self.data = None

        # : (# input genes, # matches genes)
        self.matchedInfo = 0, 0

        self.setBlocking(True)
        self.executor = ThreadExecutor(self)

        self.progressBarInit()

        task = Task(
            function=partial(
                taxonomy.ensure_downloaded,
                callback=methodinvoke(self, "advance", ())
            )
        )

        task.resultReady.connect(self.initialize)
        task.exceptionReady.connect(self._onInitializeError)

        self.initfuture = self.executor.submit(task)

    def sizeHint(self):
        return QSize(1024, 720)

    @Slot()
    def advance(self):
        assert self.thread() is QThread.currentThread()
        self.progressBarSet(self.progressBarValue + 1,
                            processEvents=None)

    def initialize(self):
        if self.__initialized:
            # Already initialized
            return
        self.__initialized = True

        self.organisms = sorted(
            set([name.split(".")[-2] for name in
                 serverfiles.listfiles("NCBI_geneinfo")] +
                gene.NCBIGeneInfo.common_taxids())
        )

        self.organismComboBox.addItems(
            [taxonomy.name(tax_id) for tax_id in self.organisms]
        )
        if self.taxid in self.organisms:
            self.organism_index = self.organisms.index(self.taxid)
        else:
            self.organism_index = 0
            self.taxid = self.organisms[self.organism_index]

        self.altSourceCheck.setVisible(self.taxid == DICTY_TAXID)
        self.dictyExpressBox.setVisible(self.taxid == DICTY_TAXID)

        self.infoLabel.setText("No data on input\n")
        self.initfuture = None

        self.setBlocking(False)
        self.progressBarFinished(processEvents=None)

    def _onInitializeError(self, exc):
        sys.excepthook(type(exc), exc.args, None)
        self.error(0, "Could not download the necessary files.")

    def _onSelectedOrganismChanged(self):
        assert 0 <= self.organism_index <= len(self.organisms)
        self.taxid = self.organisms[self.organism_index]
        self.altSourceCheck.setVisible(self.taxid == DICTY_TAXID)
        self.dictyExpressBox.setVisible(self.taxid == DICTY_TAXID)

        if self.data is not None:
            self.updateInfoItems()

    def setData(self, data=None):
        if not self.__initialized:
            self.initfuture.result()
            self.initialize()

        if self.itemsfuture is not None:
            raise Exception("Already processing")

        self.closeContext()
        self.data = data

        if data is not None:
            self.geneAttrComboBox.clear()
            self.attributes = \
                [attr for attr in data.domain.variables + data.domain.metas
                 if isinstance(attr, (Orange.data.StringVariable,
                                      Orange.data.DiscreteVariable))]

            for var in self.attributes:
                self.geneAttrComboBox.addItem(*gui.attributeItem(var))

            self.taxid = data_hints.get_hint(self.data, "taxid", self.taxid)
            self.useAttr = data_hints.get_hint(
                self.data, "genesinrows", self.useAttr)

            self.openContext(data)
            self.gene_attr = min(self.gene_attr, len(self.attributes) - 1)

            if self.taxid in self.organisms:
                self.organism_index = self.organisms.index(self.taxid)
            else:
                self.organism_index = 0
                self.taxid = self.organisms[self.organism_index]

            self.updateInfoItems()
        else:
            self.clear()

    def infoSource(self):
        """ Return the current selected info source getter function from
        INFO_SOURCES
        """
        org = self.organisms[min(self.organism_index, len(self.organisms) - 1)]
        if org not in INFO_SOURCES:
            org = "default"
        sources = INFO_SOURCES[org]
        name, func = sources[min(self.useAltSource, len(sources) - 1)]
        return name, func

    def inputGenes(self):
        if self.useAttr:
            genes = [attr.name for attr in self.data.domain.attributes]
        elif self.attributes:
            attr = self.attributes[self.gene_attr]
            genes = [str(ex[attr]) for ex in self.data
                     if not math.isnan(ex[attr])]
        else:
            genes = []
        return genes

    def updateInfoItems(self):
        self.warning(0)
        if self.data is None:
            return

        genes = self.inputGenes()
        if self.useAttr:
            genes = [attr.name for attr in self.data.domain.attributes]
        elif self.attributes:
            attr = self.attributes[self.gene_attr]
            genes = [str(ex[attr]) for ex in self.data
                     if not math.isnan(ex[attr])]
        else:
            genes = []
        if not genes:
            self.warning(0, "Could not extract genes from input dataset.")

        self.warning(1)
        org = self.organisms[min(self.organism_index, len(self.organisms) - 1)]
        source_name, info_getter = self.infoSource()

        self.error(0)

        self.updateDictyExpressLink(genes, show=org == DICTY_TAXID)
        self.altSourceCheck.setVisible(org == DICTY_TAXID)

        self.progressBarInit()
        self.setBlocking(True)
        self.setEnabled(False)
        self.infoLabel.setText("Retrieving info records.\n")

        self.genes = genes

        task = Task(
            function=partial(
                info_getter, org, genes,
                advance=methodinvoke(self, "advance", ()))
        )
        self.itemsfuture = self.executor.submit(task)
        task.finished.connect(self._onItemsCompleted)

    def _onItemsCompleted(self):
        self.setBlocking(False)
        self.progressBarFinished()
        self.setEnabled(True)

        try:
            schema, geneinfo = self.itemsfuture.result()
        finally:
            self.itemsfuture = None

        self.geneinfo = geneinfo = list(zip(self.genes, geneinfo))
        self.cells = cells = []
        self.row2geneinfo = {}
        links = []
        for i, (_, gi) in enumerate(geneinfo):
            if gi:
                row = []
                for _, item in zip(schema, gi):
                    if isinstance(item, Link):
                        # TODO: This should be handled by delegates
                        row.append(item.text)
                        links.append(item.link)
                    else:
                        row.append(item)
                cells.append(row)
                self.row2geneinfo[len(cells) - 1] = i

        model = TreeModel(cells, [str(col) for col in schema], None)

        model.setColumnLinks(0, links)
        proxyModel = QSortFilterProxyModel(self)
        proxyModel.setSourceModel(model)
        self.treeWidget.setModel(proxyModel)
        self.treeWidget.selectionModel().selectionChanged.connect(self.commit)

        for i in range(7):
            self.treeWidget.resizeColumnToContents(i)
            self.treeWidget.setColumnWidth(
                i, min(self.treeWidget.columnWidth(i), 200)
            )

        self.infoLabel.setText("%i genes\n%i matched NCBI's IDs" %
                               (len(self.genes), len(cells)))
        self.matchedInfo = len(self.genes), len(cells)

    def clear(self):
        self.infoLabel.setText("No data on input\n")
        self.treeWidget.setModel(
            TreeModel([], ["NCBI ID", "Symbol", "Locus Tag",
                           "Chromosome", "Description", "Synonyms",
                           "Nomenclature"], self.treeWidget))

        self.geneAttrComboBox.clear()
        self.send("Data Subset", None)

    def commit(self):
        if self.data is None:
            self.send("Data Subset", None)
            return

        model = self.treeWidget.model()
        selection = self.treeWidget.selectionModel().selection()
        selection = model.mapSelectionToSource(selection)
        selectedRows = list(
            chain(*(range(r.top(), r.bottom() + 1) for r in selection))
        )

        model = model.sourceModel()

        selectedGeneids = [self.row2geneinfo[row] for row in selectedRows]
        selectedIds = [self.geneinfo[i][0] for i in selectedGeneids]
        selectedIds = set(selectedIds)
        gene2row = dict((self.geneinfo[self.row2geneinfo[row]][0], row)
                        for row in selectedRows)

        isselected = selectedIds.__contains__

        if self.useAttr:
            def is_selected(attr):
                return attr.name in selectedIds
            attrs = [attr for attr in self.data.domain.attributes
                     if isselected(attr.name)]
            domain = Orange.data.Domain(
                attrs, self.data.domain.class_vars, self.data.domain.metas)
            newdata = self.data.from_table(domain, self.data)
            self.send("Data Subset", newdata)

        elif self.attributes:
            attr = self.attributes[self.gene_attr]
            gene_col = [attr.str_val(v)
                        for v in self.data.get_column_view(attr)[0]]
            gene_col = [(i, name) for i, name in enumerate(gene_col)
                        if isselected(name)]
            indices = [i for i, _ in gene_col]

            # Add a gene info columns to the output
            headers = [str(model.headerData(i, Qt.Horizontal, Qt.DisplayRole))
                       for i in range(model.columnCount())]
            metas = [Orange.data.StringVariable(name) for name in headers]
            domain = Orange.data.Domain(
                self.data.domain.attributes, self.data.domain.class_vars,
                self.data.domain.metas + tuple(metas))

            newdata = self.data.from_table(domain, self.data)[indices]

            model_rows = [gene2row[gene] for _, gene in gene_col]
            for col, meta in zip(range(model.columnCount()), metas):
                col_data = [str(model.index(row, col).data(Qt.DisplayRole))
                            for row in model_rows]
                newdata[:, meta] = col_data

            if not len(newdata):
                newdata = None

            self.send("Data Subset", newdata)
        else:
            self.send("Data Subset", None)

    def rowFiltered(self, row):
        searchStrings = self.search_string.lower().split()
        row = " ".join(self.cells[row]).lower()
        return not all([s in row for s in searchStrings])

    def searchUpdate(self):
        if not self.data:
            return
        searchStrings = self.search_string.lower().split()
        index = self.treeWidget.model().sourceModel().index
        mapFromSource = self.treeWidget.model().mapFromSource
        for i, row in enumerate(self.cells):
            row = " ".join(row).lower()
            self.treeWidget.setRowHidden(
                mapFromSource(index(i, 0)).row(),
                QModelIndex(),
                not all([s in row for s in searchStrings]))

    def selectFiltered(self):
        if not self.data:
            return
        itemSelection = QItemSelection()

        index = self.treeWidget.model().sourceModel().index
        mapFromSource = self.treeWidget.model().mapFromSource
        for i, row in enumerate(self.cells):
            if not self.rowFiltered(i):
                itemSelection.select(mapFromSource(index(i, 0)),
                                     mapFromSource(index(i, 0)))
        self.treeWidget.selectionModel().select(
            itemSelection,
            QItemSelectionModel.Select | QItemSelectionModel.Rows)

    def updateDictyExpressLink(self, genes, show=False):
        def fix(ddb):
            if ddb.startswith("DDB"):
                if not ddb.startswith("DDB_G"):
                    ddb = ddb.replace("DDB", "DDB_G")
                return ddb
            return None
        if show:
            genes = [fix(gene) for gene in genes if fix(gene)]
            link1 = '<a href="http://dictyexpress.biolab.si/run/index.php?gene=%s">Microarray profile</a>'
            link2 = '<a href="http://dictyexpress.biolab.si/run/index.php?gene=%s&db=rnaseq">RNA-Seq profile</a>'
            self.linkLabel.setText(link1 + "<br/>" + link2)

            show = any(genes)

        if show:
            self.dictyExpressBox.show()
        else:
            self.dictyExpressBox.hide()

    def onDictyExpressLink(self, link):
        if not self.data:
            return

        selectedIndexes = self.treeWidget.selectedIndexes()
        if not len(selectedIndexes):
            QMessageBox.information(
                self, "No gene ids selected",
                "Please select some genes and try again."
            )
            return
        model = self.treeWidget.model()
        mapToSource = model.mapToSource
        selectedRows = self.treeWidget.selectedIndexes()
        selectedRows = [mapToSource(index).row() for index in selectedRows]
        model = model.sourceModel()

        selectedGeneids = [self.row2geneinfo[row] for row in selectedRows]
        selectedIds = [self.geneinfo[i][0] for i in selectedGeneids]
        selectedIds = set(selectedIds)

        def fix(ddb):
            if ddb.startswith("DDB"):
                if not ddb.startswith("DDB_G"):
                    ddb = ddb.replace("DDB", "DDB_G")
                return ddb
            return None

        genes = [fix(gene) for gene in selectedIds if fix(gene)]
        url = str(link) % " ".join(genes)
        QDesktopServices.openUrl(QUrl(url))

    def onAltSourceChange(self):
        self.updateInfoItems()

    def onDeleteWidget(self):
        # try to cancel pending tasks
        if self.initfuture:
            self.initfuture.cancel()
        if self.itemsfuture:
            self.itemsfuture.cancel()

        self.executor.shutdown(wait=False)
        super().onDeleteWidget()
Ejemplo n.º 4
0
class OWGEODatasets(OWWidget):
    name = "GEO Data Sets"
    description = DESCRIPTION
    icon = "../widgets/icons/GEODataSets.svg"
    priority = PRIORITY

    inputs = []
    outputs = [("Expression Data", Orange.data.Table)]

    settingsList = [
        "outputRows", "mergeSpots", "gdsSelectionStates", "splitterSettings",
        "currentGds", "autoCommit", "datasetNames"
    ]

    outputRows = Setting(True)
    mergeSpots = Setting(True)
    gdsSelectionStates = Setting({})
    currentGds = Setting(None)
    datasetNames = Setting({})
    splitterSettings = Setting((
        b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xea\x00\x00\x00\xd7\x01\x00\x00\x00\x07\x01\x00\x00\x00\x02',
        b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xb5\x00\x00\x02\x10\x01\x00\x00\x00\x07\x01\x00\x00\x00\x01'
    ))

    autoCommit = Setting(False)

    def __init__(self, parent=None, signalManager=None, name=" GEO Data Sets"):
        OWWidget.__init__(self, parent, signalManager, name)

        self.selectionChanged = False
        self.filterString = ""
        self.datasetName = ""

        ## GUI
        box = gui.widgetBox(self.controlArea, "Info", addSpace=True)
        self.infoBox = gui.widgetLabel(box, "Initializing\n\n")

        box = gui.widgetBox(self.controlArea, "Output", addSpace=True)
        gui.radioButtonsInBox(box,
                              self,
                              "outputRows",
                              ["Genes in rows", "Samples in rows"],
                              "Rows",
                              callback=self.commitIf)

        gui.checkBox(box,
                     self,
                     "mergeSpots",
                     "Merge spots of same gene",
                     callback=self.commitIf)

        gui.separator(box)
        self.nameEdit = gui.lineEdit(
            box,
            self,
            "datasetName",
            "Data set name",
            tooltip="Override the default output data set name",
            callback=self.onNameEdited)
        self.nameEdit.setPlaceholderText("")

        if sys.version_info < (3, ):
            box = gui.widgetBox(self.controlArea, "Commit", addSpace=True)
            self.commitButton = gui.button(box,
                                           self,
                                           "Commit",
                                           callback=self.commit)
            cb = gui.checkBox(box, self, "autoCommit", "Commit on any change")
            gui.setStopper(self, self.commitButton, cb, "selectionChanged",
                           self.commit)
        else:
            gui.auto_commit(self.controlArea,
                            self,
                            "autoCommit",
                            "Commit",
                            box="Commit")
            self.commitIf = self.commit

        gui.rubber(self.controlArea)

        gui.widgetLabel(self.mainArea, "Filter")
        self.filterLineEdit = QLineEdit(textChanged=self.filter)
        self.completer = TokenListCompleter(self,
                                            caseSensitivity=Qt.CaseInsensitive)
        self.filterLineEdit.setCompleter(self.completer)

        self.mainArea.layout().addWidget(self.filterLineEdit)

        splitter = QSplitter(Qt.Vertical, self.mainArea)
        self.mainArea.layout().addWidget(splitter)
        self.treeWidget = QTreeView(splitter)

        self.treeWidget.setSelectionMode(QTreeView.SingleSelection)
        self.treeWidget.setRootIsDecorated(False)
        self.treeWidget.setSortingEnabled(True)
        self.treeWidget.setAlternatingRowColors(True)
        self.treeWidget.setUniformRowHeights(True)
        self.treeWidget.setEditTriggers(QTreeView.NoEditTriggers)

        linkdelegate = LinkStyledItemDelegate(self.treeWidget)
        self.treeWidget.setItemDelegateForColumn(1, linkdelegate)
        self.treeWidget.setItemDelegateForColumn(8, linkdelegate)
        self.treeWidget.setItemDelegateForColumn(
            0, gui.IndicatorItemDelegate(self.treeWidget, role=Qt.DisplayRole))

        proxyModel = MySortFilterProxyModel(self.treeWidget)
        self.treeWidget.setModel(proxyModel)
        self.treeWidget.selectionModel().selectionChanged.connect(
            self.updateSelection)
        self.treeWidget.viewport().setMouseTracking(True)

        splitterH = QSplitter(Qt.Horizontal, splitter)

        box = gui.widgetBox(splitterH, "Description")
        self.infoGDS = gui.widgetLabel(box, "")
        self.infoGDS.setWordWrap(True)
        gui.rubber(box)

        box = gui.widgetBox(splitterH, "Sample Annotations")
        self.annotationsTree = QTreeWidget(box)
        self.annotationsTree.setHeaderLabels(
            ["Type (Sample annotations)", "Sample count"])
        self.annotationsTree.setRootIsDecorated(True)
        box.layout().addWidget(self.annotationsTree)
        self.annotationsTree.itemChanged.connect(
            self.annotationSelectionChanged)
        self._annotationsUpdating = False
        self.splitters = splitter, splitterH

        for sp, setting in zip(self.splitters, self.splitterSettings):
            sp.splitterMoved.connect(self.splitterMoved)
            sp.restoreState(setting)

        self.searchKeys = [
            "dataset_id", "title", "platform_organism", "description"
        ]

        self.gds = []
        self.gds_info = None

        self.resize(1000, 600)

        self.setBlocking(True)
        self.setEnabled(False)
        self.progressBarInit()

        self._executor = ThreadExecutor()

        func = partial(get_gds_model,
                       methodinvoke(self, "_setProgress", (float, )))
        self._inittask = Task(function=func)
        self._inittask.finished.connect(self._initializemodel)
        self._executor.submit(self._inittask)

        self._datatask = None

    @Slot(float)
    def _setProgress(self, value):
        self.progressBarValue = value

    def _initializemodel(self):
        assert self.thread() is QThread.currentThread()
        model, self.gds_info, self.gds = self._inittask.result()
        model.setParent(self)

        proxy = self.treeWidget.model()
        proxy.setFilterKeyColumn(0)
        proxy.setFilterRole(TextFilterRole)
        proxy.setFilterCaseSensitivity(False)
        proxy.setFilterFixedString(self.filterString)

        proxy.setSourceModel(model)
        proxy.sort(0, Qt.DescendingOrder)

        self.progressBarFinished()
        self.setBlocking(False)
        self.setEnabled(True)

        filter_items = " ".join(gds[key] for gds in self.gds
                                for key in self.searchKeys)
        tr_chars = ",.:;!?(){}[]_-+\\|/%#@$^&*<>~`"
        tr_table = str.maketrans(tr_chars, " " * len(tr_chars))
        filter_items = filter_items.translate(tr_table)

        filter_items = sorted(set(filter_items.split(" ")))
        filter_items = [item for item in filter_items if len(item) > 3]

        self.completer.setTokenList(filter_items)

        if self.currentGds:
            current_id = self.currentGds["dataset_id"]
            gdss = [(i, qunpack(proxy.data(proxy.index(i, 1), Qt.DisplayRole)))
                    for i in range(proxy.rowCount())]
            current = [i for i, data in gdss if data and data == current_id]
            if current:
                current_index = proxy.index(current[0], 0)
                self.treeWidget.selectionModel().select(
                    current_index,
                    QItemSelectionModel.Select | QItemSelectionModel.Rows)
                self.treeWidget.scrollTo(current_index,
                                         QTreeView.PositionAtCenter)

        for i in range(8):
            self.treeWidget.resizeColumnToContents(i)

        self.treeWidget.setColumnWidth(
            1, min(self.treeWidget.columnWidth(1), 300))
        self.treeWidget.setColumnWidth(
            2, min(self.treeWidget.columnWidth(2), 200))

        self.updateInfo()

    def updateInfo(self):
        gds_info = self.gds_info
        text = ("%i datasets\n%i datasets cached\n" %
                (len(gds_info),
                 len(glob.glob(serverfiles.localpath("GEO") + "/GDS*"))))
        filtered = self.treeWidget.model().rowCount()
        if len(self.gds) != filtered:
            text += ("%i after filtering") % filtered
        self.infoBox.setText(text)

    def updateSelection(self, *args):
        current = self.treeWidget.selectedIndexes()
        mapToSource = self.treeWidget.model().mapToSource
        current = [mapToSource(index).row() for index in current]
        if current:
            self.currentGds = self.gds[current[0]]
            self.setAnnotations(self.currentGds)
            self.infoGDS.setText(self.currentGds.get("description", ""))
            self.nameEdit.setPlaceholderText(self.currentGds["title"])
            self.datasetName = \
                self.datasetNames.get(self.currentGds["dataset_id"], "")
        else:
            self.currentGds = None
            self.nameEdit.setPlaceholderText("")
            self.datasetName = ""

        self.commitIf()

    def setAnnotations(self, gds):
        self._annotationsUpdating = True
        self.annotationsTree.clear()

        annotations = defaultdict(set)
        subsetscount = {}
        for desc in gds["subsets"]:
            annotations[desc["type"]].add(desc["description"])
            subsetscount[desc["description"]] = str(len(desc["sample_id"]))

        for type, subsets in annotations.items():
            key = (gds["dataset_id"], type)
            subsetItem = QTreeWidgetItem(self.annotationsTree, [type])
            subsetItem.setFlags(subsetItem.flags() | Qt.ItemIsUserCheckable
                                | Qt.ItemIsTristate)
            subsetItem.setCheckState(
                0, self.gdsSelectionStates.get(key, Qt.Checked))
            subsetItem.key = key
            for subset in subsets:
                key = (gds["dataset_id"], type, subset)
                item = QTreeWidgetItem(
                    subsetItem, [subset, subsetscount.get(subset, "")])
                item.setFlags(item.flags() | Qt.ItemIsUserCheckable)
                item.setCheckState(
                    0, self.gdsSelectionStates.get(key, Qt.Checked))
                item.key = key
        self._annotationsUpdating = False
        self.annotationsTree.expandAll()
        for i in range(self.annotationsTree.columnCount()):
            self.annotationsTree.resizeColumnToContents(i)

    def annotationSelectionChanged(self, item, column):
        if self._annotationsUpdating:
            return
        for i in range(self.annotationsTree.topLevelItemCount()):
            item = self.annotationsTree.topLevelItem(i)
            self.gdsSelectionStates[item.key] = item.checkState(0)
            for j in range(item.childCount()):
                child = item.child(j)
                self.gdsSelectionStates[child.key] = child.checkState(0)

    def filter(self):
        filter_string = unicode(self.filterLineEdit.text())
        proxyModel = self.treeWidget.model()
        if proxyModel:
            strings = filter_string.lower().strip().split()
            proxyModel.setFilterFixedStrings(strings)
            self.updateInfo()

    def selectedSamples(self):
        """
        Return the currently selected sample annotations.

        The return value is a list of selected (sample type, sample value)
        tuples.

        .. note:: if some Sample annotation type has no selected values.
                  this method will return all values for it.

        """
        samples = []
        unused_types = []
        used_types = []
        for stype in childiter(self.annotationsTree.invisibleRootItem()):
            selected_values = []
            all_values = []
            for sval in childiter(stype):
                value = (str(stype.text(0)), str(sval.text(0)))
                if self.gdsSelectionStates.get(sval.key, True):
                    selected_values.append(value)
                all_values.append(value)
            if selected_values:
                samples.extend(selected_values)
                used_types.append(str(stype.text(0)))
            else:
                # If no sample of sample type is selected we don't filter
                # on it.
                samples.extend(all_values)
                unused_types.append(str(stype.text(0)))

        return samples, used_types

    def commitIf(self):
        if self.autoCommit:
            self.commit()
        else:
            self.selectionChanged = True

    @Slot(int, int)
    def progressCompleted(self, value, total):
        if total > 0:
            self.progressBarSet(100. * value / total, processEvents=False)
        else:
            pass
            # TODO: report 'indeterminate progress'

    def commit(self):
        if self.currentGds:
            self.error(0)
            sample_type = None
            self.progressBarInit(processEvents=None)

            _, groups = self.selectedSamples()
            if len(groups) == 1 and self.outputRows:
                sample_type = groups[0]

            self.setEnabled(False)
            self.setBlocking(True)

            progress = methodinvoke(self, "progressCompleted", (int, int))

            def get_data(gds_id, report_genes, transpose, sample_type, title):
                gds_ensure_downloaded(gds_id, progress)
                gds = geo.GDS(gds_id)
                data = gds.getdata(report_genes=report_genes,
                                   transpose=transpose,
                                   sample_type=sample_type)
                data.name = title
                return data

            get_data = partial(get_data,
                               self.currentGds["dataset_id"],
                               report_genes=self.mergeSpots,
                               transpose=self.outputRows,
                               sample_type=sample_type,
                               title=self.datasetName
                               or self.currentGds["title"])
            self._datatask = Task(function=get_data)
            self._datatask.finished.connect(self._on_dataready)
            self._executor.submit(self._datatask)

    def _on_dataready(self):
        self.setEnabled(True)
        self.setBlocking(False)
        self.progressBarFinished(processEvents=False)

        try:
            data = self._datatask.result()
        except urlrequest.URLError as error:
            self.error(0, ("Error while connecting to the NCBI ftp server! "
                           "'%s'" % error))
            sys.excepthook(type(error), error, getattr(error, "__traceback__"))
            return
        finally:
            self._datatask = None

        data_name = data.name
        samples, _ = self.selectedSamples()

        self.warning(0)
        message = None
        if self.outputRows:

            def samplesinst(ex):
                out = []
                for meta in data.domain.metas:
                    out.append((meta.name, ex[meta].value))

                if data.domain.class_var.name != 'class':
                    out.append((data.domain.class_var.name,
                                ex[data.domain.class_var].value))

                return out

            samples = set(samples)
            mask = [samples.issuperset(samplesinst(ex)) for ex in data]
            data = data[numpy.array(mask, dtype=bool)]
            if len(data) == 0:
                message = "No samples with selected sample annotations."
        else:
            samples = set(samples)
            domain = Orange.data.Domain([
                attr for attr in data.domain.attributes
                if samples.issuperset(attr.attributes.items())
            ], data.domain.class_var, data.domain.metas)
            #             domain.addmetas(data.domain.getmetas())

            if len(domain.attributes) == 0:
                message = "No samples with selected sample annotations."
            stypes = set(s[0] for s in samples)
            for attr in domain.attributes:
                attr.attributes = dict(
                    (key, value) for key, value in attr.attributes.items()
                    if key in stypes)
            data = Orange.data.Table(domain, data)

        if message is not None:
            self.warning(0, message)

        data_hints.set_hint(data, "taxid", self.currentGds.get("taxid", ""),
                            10.0)
        data_hints.set_hint(data, "genesinrows", self.outputRows, 10.0)

        data.name = data_name
        self.send("Expression Data", data)

        model = self.treeWidget.model().sourceModel()
        row = self.gds.index(self.currentGds)

        model.setData(model.index(row, 0), " ", Qt.DisplayRole)

        self.updateInfo()
        self.selectionChanged = False

    def splitterMoved(self, *args):
        self.splitterSettings = [
            bytes(sp.saveState()) for sp in self.splitters
        ]

    def send_report(self):
        self.report_items("GEO Dataset",
                          [("ID", self.currentGds['dataset_id']),
                           ("Title", self.currentGds['title']),
                           ("Organism", self.currentGds['sample_organism'])])
        self.report_items("Data",
                          [("Samples", self.currentGds['sample_count']),
                           ("Features", self.currentGds['feature_count']),
                           ("Genes", self.currentGds['gene_count'])])
        self.report_name("Sample annotations")
        subsets = defaultdict(list)
        for subset in self.currentGds['subsets']:
            subsets[subset['type']].append(
                (subset['description'], len(subset['sample_id'])))
        self.report_html += "<ul>"
        for type in subsets:
            self.report_html += "<b>" + type + ":</b></br>"
            for desc, count in subsets[type]:
                self.report_html += 9 * "&nbsp" + "<b>{}:</b> {}</br>".format(
                    desc, count)
        self.report_html += "</ul>"

    def onDeleteWidget(self):
        if self._inittask:
            self._inittask.future().cancel()
            self._inittask.finished.disconnect(self._initializemodel)
        if self._datatask:
            self._datatask.future().cancel()
            self._datatask.finished.disconnect(self._on_dataready)
        self._executor.shutdown(wait=False)

        super(OWGEODatasets, self).onDeleteWidget()

    def onNameEdited(self):
        if self.currentGds:
            gds_id = self.currentGds["dataset_id"]
            self.datasetNames[gds_id] = unicode(self.nameEdit.text())
            self.commitIf()
Ejemplo n.º 5
0
class OWTestLearners(widget.OWWidget):
    name = "Test Learners"
    description = ""
    icon = "icons/TestLearners1.svg"
    priority = 100

    inputs = [("Learner", Orange.classification.Learner,
               "set_learner", widget.Multiple),
              ("Data", Orange.data.Table, "set_train_data", widget.Default),
              ("Test Data", Orange.data.Table, "set_test_data")]

    outputs = [("Evaluation Results", testing.Results)]

    #: Resampling/testing types
    KFold, LeaveOneOut, Bootstrap, TestOnTrain, TestOnTest = 0, 1, 2, 3, 4

    #: Selected resampling type
    resampling = settings.Setting(0)
    #: Number of folds for K-fold cross validation
    k_folds = settings.Setting(10)
    #: Number of repeats for bootstrap sampling
    n_repeat = settings.Setting(10)
    #: Bootstrap sampling p
    sample_p = settings.Setting(75)

    def __init__(self, parent=None):
        super().__init__(parent)

        self.train_data = None
        self.test_data = None

        #: An Ordered dictionary with current inputs and their testing
        #: results.
        self.learners = OrderedDict()

        sbox = gui.widgetBox(self.controlArea, "Sampling")
        rbox = gui.radioButtons(
            sbox, self, "resampling", callback=self._param_changed
        )
        gui.appendRadioButton(rbox, "Cross validation")
        ibox = gui.indentedBox(rbox)
        gui.spin(ibox, self, "k_folds", 2, 50, label="Number of folds:",
                 callback=self.kfold_changed)
        gui.appendRadioButton(rbox, "Leave one out")
        gui.appendRadioButton(rbox, "Random sampling")
        ibox = gui.indentedBox(rbox)
        gui.spin(ibox, self, "n_repeat", 2, 50, label="Repeat train/test",
                 callback=self.bootstrap_changed)
        gui.widgetLabel(ibox, "Relative training set size:")
        gui.hSlider(ibox, self, "sample_p", minValue=1, maxValue=100,
                    ticks=20, vertical=False, labelFormat="%d %%",
                    callback=self.bootstrap_changed)

        gui.appendRadioButton(rbox, "Test on train data")
        gui.appendRadioButton(rbox, "Test on test data")

        rbox.layout().addSpacing(5)
        gui.button(rbox, self, "Apply", callback=self.apply)

        gui.rubber(self.controlArea)

        self.view = QTreeView(
            rootIsDecorated=False,
            uniformRowHeights=True,
            wordWrap=True,
            editTriggers=QTreeView.NoEditTriggers
        )
        header = self.view.header()
        header.setResizeMode(QHeaderView.ResizeToContents)
        header.setDefaultAlignment(Qt.AlignCenter)
        header.setStretchLastSection(False)

        self.result_model = QStandardItemModel()
        self.view.setModel(self.result_model)
        self.view.setItemDelegate(ItemDelegate())
        self._update_header()
        box = gui.widgetBox(self.mainArea, "Evaluation Results")
        box.layout().addWidget(self.view)

    def set_learner(self, learner, key):
        if key in self.learners and learner is None:
            del self.learners[key]
        else:
            self.learners[key] = Input(learner, None, ())
        self._update_stats_model()

    def set_train_data(self, data):
        self.error(0)
        if data is not None:
            if data.domain.class_var is None:
                self.error(0, "Train data input requires a class variable")
                data = None

        self.train_data = data
        self._update_header()
        self._invalidate()

    def set_test_data(self, data):
        self.error(1)
        if data is not None:
            if data.domain.class_var is None:
                self.error(1, "Test data input requires a class variable")
                data = None

        self.test_data = data
        if self.resampling == OWTestLearners.TestOnTest:
            self._invalidate()

    def handleNewSignals(self):
        self.update_results()
        self.commit()

    def kfold_changed(self):
        self.resampling = OWTestLearners.KFold
        self._param_changed()

    def bootstrap_changed(self):
        self.resampling = OWTestLearners.Bootstrap
        self._param_changed()

    def _param_changed(self):
        self._invalidate()

    def update_results(self):
        self.warning([1, 2])
        self.error(2)

        if self.train_data is None:
            return

        if self.resampling == OWTestLearners.TestOnTest:
            if self.test_data is None:
                self.warning(2, "Missing separate test data input")
                return

            elif self.test_data.domain.class_var != \
                    self.train_data.domain.class_var:
                self.error(2, ("Inconsistent class variable between test " +
                               "and train data sets"))
                return

        # items in need of an update
        items = [(key, input) for key, input in self.learners.items()
                 if input.results is None]
        learners = [input.learner for _, input in items]

        self.setStatusMessage("Running")
        if self.test_data is not None and \
                self.resampling != OWTestLearners.TestOnTest:
            self.warning(1, "Test data is present but unused. "
                            "Select 'Test on test data' to use it.")

        # TODO: Test each learner individually

        if self.resampling == OWTestLearners.KFold:
            results = testing.CrossValidation(
                self.train_data, learners, k=self.k_folds, store_data=True
            )
        elif self.resampling == OWTestLearners.LeaveOneOut:
            results = testing.LeaveOneOut(
                self.train_data, learners, store_data=True
            )
        elif self.resampling == OWTestLearners.Bootstrap:
            p = self.sample_p / 100.0
            results = testing.Bootstrap(
                self.train_data, learners, n_resamples=self.n_repeat, p=p,
                store_data=True
            )
        elif self.resampling == OWTestLearners.TestOnTrain:
            results = testing.TestOnTrainingData(
                self.train_data, learners, store_data=True
            )
        elif self.resampling == OWTestLearners.TestOnTest:
            if self.test_data is None:
                return
            results = testing.TestOnTestData(
                self.train_data, self.test_data, learners, store_data=True
            )
        else:
            assert False

        results = list(split_by_model(results))
        class_var = self.train_data.domain.class_var
        
        if is_discrete(class_var):
            test_stats = classification_stats
        else:
            test_stats = regression_stats
        
        self._update_header()
        
        stats = [test_stats(res) for res in results]
        for (key, input), res, stat in zip(items, results, stats):
            self.learners[key] = input._replace(results=res, stats=stat)

        self.setStatusMessage("")
        
        self._update_stats_model()

    def _update_header(self):
        headers = ["Method"]
        if self.train_data is not None:
            if is_discrete(self.train_data.domain.class_var):
                headers.extend(classification_stats.headers)
            else:
                headers.extend(regression_stats.headers)
        for i in reversed(range(len(headers),
                                self.result_model.columnCount())):
            self.result_model.takeColumn(i)

        self.result_model.setHorizontalHeaderLabels(headers)

    def _update_stats_model(self):
        model = self.view.model()

        for r in reversed(range(model.rowCount())):
            model.takeRow(r)

        for input in self.learners.values():
            name = learner_name(input.learner)
            row = []
            head = QStandardItem()
            head.setData(name, Qt.DisplayRole)
            row.append(head)
            for stat in input.stats:
                item = QStandardItem()
                item.setData(" {:.3f} ".format(stat[0]), Qt.DisplayRole)
                row.append(item)
            model.appendRow(row)

    def _invalidate(self, which=None):
        if which is None:
            which = self.learners.keys()

        all_keys = list(self.learners.keys())
        model = self.view.model()

        for key in which:
            self.learners[key] = \
                self.learners[key]._replace(results=None, stats=None)

            if key in self.learners:
                row = all_keys.index(key)
                for c in range(1, model.columnCount()):
                    item = model.item(row, c)
                    if item is not None:
                        item.setData(None, Qt.DisplayRole)

    def apply(self):
        self.update_results()
        self.commit()

    def commit(self):
        results = [val.results for val in self.learners.values()
                   if val.results is not None]
        if results:
            combined = results_merge(results)
            combined.learner_names = [learner_name(val.learner)
                                     for val in self.learners.values()]
        else:
            combined = None
        self.send("Evaluation Results", combined)
Ejemplo n.º 6
0
class OWTestLearners(widget.OWWidget):
    name = "Test & Score"
    description = "Cross-validation accuracy estimation."
    icon = "icons/TestLearners1.svg"
    priority = 100

    inputs = [("Learner", Learner,
               "set_learner", widget.Multiple),
              ("Data", Table, "set_train_data", widget.Default),
              ("Test Data", Table, "set_test_data"),
              ("Preprocessor", Preprocess, "set_preprocessor")]

    outputs = [("Predictions", Orange.data.Table),
               ("Evaluation Results", Orange.evaluation.Results)]

    settingsHandler = settings.ClassValuesContextHandler()

    #: Resampling/testing types
    KFold, LeaveOneOut, ShuffleSplit, TestOnTrain, TestOnTest = 0, 1, 2, 3, 4

    #: Selected resampling type
    resampling = settings.Setting(0)
    #: Number of folds for K-fold cross validation
    k_folds = settings.Setting(10)
    #: Number of repeats for ShuffleSplit sampling
    n_repeat = settings.Setting(10)
    #: ShuffleSplit sampling p
    sample_p = settings.Setting(75)

    TARGET_AVERAGE = "(Average over classes)"
    class_selection = settings.ContextSetting(TARGET_AVERAGE)

    def __init__(self):
        super().__init__()

        self.data = None
        self.test_data = None
        self.preprocessor = None
        self.train_data_missing_vals = False
        self.test_data_missing_vals = False

        #: An Ordered dictionary with current inputs and their testing
        #: results.
        self.learners = OrderedDict()

        sbox = gui.widgetBox(self.controlArea, "Sampling")
        rbox = gui.radioButtons(
            sbox, self, "resampling", callback=self._param_changed
        )
        gui.appendRadioButton(rbox, "Cross validation")
        ibox = gui.indentedBox(rbox)
        gui.spin(ibox, self, "k_folds", 2, 50, label="Number of folds:",
                 callback=self.kfold_changed)
        gui.appendRadioButton(rbox, "Leave one out")
        gui.appendRadioButton(rbox, "Random sampling")
        ibox = gui.indentedBox(rbox)
        gui.spin(ibox, self, "n_repeat", 2, 50, label="Repeat train/test",
                 callback=self.shuffle_split_changed)
        gui.widgetLabel(ibox, "Relative training set size:")
        gui.hSlider(ibox, self, "sample_p", minValue=1, maxValue=99,
                    ticks=20, vertical=False, labelFormat="%d %%",
                    callback=self.shuffle_split_changed)

        gui.appendRadioButton(rbox, "Test on train data")
        gui.appendRadioButton(rbox, "Test on test data")

        rbox.layout().addSpacing(5)
        self.apply_button = gui.button(
            rbox, self, "Apply", callback=self.apply, default=True)

        self.cbox = gui.widgetBox(self.controlArea, "Target class")
        self.class_selection_combo = gui.comboBox(
            self.cbox, self, "class_selection", items=[],
            sendSelectedValue=True, valueType=str,
            callback=self._on_target_class_changed,
            contentsLength=8)

        gui.rubber(self.controlArea)

        self.view = QTreeView(
            rootIsDecorated=False,
            uniformRowHeights=True,
            wordWrap=True,
            editTriggers=QTreeView.NoEditTriggers
        )
        header = self.view.header()
        header.setResizeMode(QHeaderView.ResizeToContents)
        header.setDefaultAlignment(Qt.AlignCenter)
        header.setStretchLastSection(False)

        self.result_model = QStandardItemModel(self)
        self.result_model.setHorizontalHeaderLabels(["Method"])
        self.view.setModel(self.result_model)
        self.view.setItemDelegate(ItemDelegate())

        box = gui.widgetBox(self.mainArea, "Evaluation Results")
        box.layout().addWidget(self.view)

    def sizeHint(self):
        return QSize(780, 1)

    def set_learner(self, learner, key):
        """
        Set the input `learner` for `key`.
        """
        if key in self.learners and learner is None:
            # Removed
            del self.learners[key]
        else:
            self.learners[key] = Input(learner, None, None)
            self._invalidate([key])

    def set_train_data(self, data):
        """
        Set the input training dataset.
        """
        self.error(0)
        self.information(0)
        if data and not data.domain.class_var:
            self.error(0, "Train data input requires a class variable")
            data = None

        if isinstance(data, SqlTable):
            if data.approx_len() < AUTO_DL_LIMIT:
                data = Table(data)
            else:
                self.information(0, "Train data has been sampled")
                data_sample = data.sample_time(1, no_cache=True)
                data_sample.download_data(AUTO_DL_LIMIT, partial=True)
                data = Table(data_sample)

        self.warning(4)
        self.train_data_missing_vals = data is not None and \
                                       np.isnan(data.Y).any()
        if self.train_data_missing_vals or self.test_data_missing_vals:
            self.warning(4, self._get_missing_data_warning(
                self.train_data_missing_vals, self.test_data_missing_vals
            ))
            if data:
                data = RemoveNaNClasses(data)

        self.data = data
        self.closeContext()
        if data is not None:
            self._update_class_selection()
            self.openContext(data.domain.class_var)
        self._invalidate()

    def set_test_data(self, data):
        """
        Set the input separate testing dataset.
        """
        self.error(1)
        self.information(1)
        if data and not data.domain.class_var:
            self.error(1, "Test data input requires a class variable")
            data = None

        if isinstance(data, SqlTable):
            if data.approx_len() < AUTO_DL_LIMIT:
                data = Table(data)
            else:
                self.information(1, "Test data has been sampled")
                data_sample = data.sample_time(1, no_cache=True)
                data_sample.download_data(AUTO_DL_LIMIT, partial=True)
                data = Table(data_sample)

        self.warning(4)
        self.test_data_missing_vals = data is not None and \
                                      np.isnan(data.Y).any()
        if self.train_data_missing_vals or self.test_data_missing_vals:
            self.warning(4, self._get_missing_data_warning(
                self.train_data_missing_vals, self.test_data_missing_vals
            ))
            if data:
                data = RemoveNaNClasses(data)

        self.test_data = data
        if self.resampling == OWTestLearners.TestOnTest:
            self._invalidate()

    def _get_missing_data_warning(self, train_missing, test_missing):
        return "Instances with unknown target values were removed from{}data"\
            .format(train_missing * test_missing * " "
                    or train_missing * " train " or test_missing * " test ")

    def set_preprocessor(self, preproc):
        """
        Set the input preprocessor to apply on the training data.
        """
        self.preprocessor = preproc
        self._invalidate()

    def handleNewSignals(self):
        """Reimplemented from OWWidget.handleNewSignals."""
        self._update_class_selection()
        self.apply()

    def kfold_changed(self):
        self.resampling = OWTestLearners.KFold
        self._param_changed()

    def shuffle_split_changed(self):
        self.resampling = OWTestLearners.ShuffleSplit
        self._param_changed()

    def _param_changed(self):
        self._invalidate()

    def _update_results(self):
        """
        Run/evaluate the learners.
        """
        self.warning([1, 2])
        self.error(2)

        if self.data is None:
            return

        class_var = self.data.domain.class_var

        if self.resampling == OWTestLearners.TestOnTest:
            if self.test_data is None:
                self.warning(2, "Missing separate test data input")
                return
            elif self.test_data.domain.class_var != class_var:
                self.error(2, ("Inconsistent class variable between test " +
                               "and train data sets"))
                return

        # items in need of an update
        items = [(key, slot) for key, slot in self.learners.items()
                 if slot.results is None]
        learners = [slot.learner for _, slot in items]
        if len(items) == 0:
            return

        if self.test_data is not None and \
                self.resampling != OWTestLearners.TestOnTest:
            self.warning(1, "Test data is present but unused. "
                            "Select 'Test on test data' to use it.")

        rstate = 42
        def update_progress(finished):
            self.progressBarSet(100 * finished)
        common_args = dict(
            store_data=True,
            preprocessor=self.preprocessor,
            callback=update_progress)
        self.setStatusMessage("Running")
        self.progressBarInit()

        try:
            if self.resampling == OWTestLearners.KFold:
                warnings = []
                results = Orange.evaluation.CrossValidation(
                    self.data, learners, k=self.k_folds, random_state=rstate,
                    warnings=warnings, **common_args)
                if warnings:
                    self.warning(2, warnings[0])
            elif self.resampling == OWTestLearners.LeaveOneOut:
                results = Orange.evaluation.LeaveOneOut(
                    self.data, learners, **common_args)
            elif self.resampling == OWTestLearners.ShuffleSplit:
                train_size = self.sample_p / 100
                results = Orange.evaluation.ShuffleSplit(
                    self.data, learners, n_resamples=self.n_repeat,
                    train_size=train_size, test_size=None,
                    random_state=rstate, **common_args)
            elif self.resampling == OWTestLearners.TestOnTrain:
                results = Orange.evaluation.TestOnTrainingData(
                    self.data, learners, **common_args)
            elif self.resampling == OWTestLearners.TestOnTest:
                results = Orange.evaluation.TestOnTestData(
                    self.data, self.test_data, learners, **common_args)
            else:
                assert False
        except RuntimeError as e:
            self.error(2, str(e))
            self.setStatusMessage("")
            self.progressBarFinished()
            return

        learner_key = {slot.learner: key for key, slot in self.learners.items()}
        for learner, result in zip(learners, split_by_model(results)):
            stats = None
            if class_var.is_discrete:
                scorers = classification_stats.scores
            elif class_var.is_continuous:
                scorers = regression_stats.scores
            else:
                scorers = None
            if scorers:
                ex = result.failed[0]
                if ex:
                    stats = [Try.Fail(ex)] * len(scorers)
                    result = Try.Fail(ex)
                else:
                    stats = [Try(lambda: score(result)) for score in scorers]
                    result = Try.Success(result)
            key = learner_key[learner]
            self.learners[key] = \
                self.learners[key]._replace(results=result, stats=stats)

        self.setStatusMessage("")
        self.progressBarFinished()

    def _update_header(self):
        # Set the correct horizontal header labels on the results_model.
        headers = ["Method"]
        if self.data is not None:
            if self.data.domain.has_discrete_class:
                headers.extend(classification_stats.headers)
            else:
                headers.extend(regression_stats.headers)

        # remove possible extra columns from the model.
        for i in reversed(range(len(headers),
                                self.result_model.columnCount())):
            self.result_model.takeColumn(i)

        self.result_model.setHorizontalHeaderLabels(headers)

    def _update_stats_model(self):
        # Update the results_model with up to date scores.
        # Note: The target class specific scores (if requested) are
        # computed as needed in this method.
        model = self.view.model()
        # clear the table model, but preserving the header labels
        for r in reversed(range(model.rowCount())):
            model.takeRow(r)

        target_index = None
        if self.data is not None:
            class_var = self.data.domain.class_var
            if class_var.is_discrete and \
                    self.class_selection != self.TARGET_AVERAGE:
                target_index = class_var.values.index(self.class_selection)
        else:
            class_var = None

        errors = []
        has_missing_scores = False

        for key, slot in self.learners.items():
            name = learner_name(slot.learner)
            head = QStandardItem(name)
            head.setData(key, Qt.UserRole)
            if isinstance(slot.results, Try.Fail):
                head.setToolTip(str(slot.results.exception))
                head.setText("{} (error)".format(name))
                head.setForeground(QtGui.QBrush(Qt.red))
                errors.append("{name} failed with error:\n"
                              "{exc.__class__.__name__}: {exc!s}"
                              .format(name=name, exc=slot.results.exception))

            row = [head]

            if class_var is not None and class_var.is_discrete and \
                    target_index is not None:
                if slot.results is not None and slot.results.success:
                    ovr_results = results_one_vs_rest(
                        slot.results.value, target_index)

                    stats = [Try(lambda: score(ovr_results))
                             for score in classification_stats.scores]
                else:
                    stats = None
            else:
                stats = slot.stats

            if stats is not None:
                for stat in stats:
                    item = QStandardItem()
                    if stat.success:
                        item.setText("{:.3f}".format(stat.value[0]))
                    else:
                        item.setToolTip(str(stat.exception))
                        has_missing_scores = True
                    row.append(item)

            model.appendRow(row)

        if errors:
            self.error(3, "\n".join(errors))
        else:
            self.error(3)

        if has_missing_scores:
            self.warning(3, "Some scores could not be computed")
        else:
            self.warning(3)

    def _update_class_selection(self):
        self.class_selection_combo.setCurrentIndex(-1)
        self.class_selection_combo.clear()
        if not self.data:
            return

        if self.data.domain.has_discrete_class:
            self.cbox.setVisible(True)
            class_var = self.data.domain.class_var
            items = [self.TARGET_AVERAGE] + class_var.values
            self.class_selection_combo.addItems(items)

            class_index = 0
            if self.class_selection in class_var.values:
                class_index = class_var.values.index(self.class_selection) + 1

            self.class_selection_combo.setCurrentIndex(class_index)
            self.class_selection = items[class_index]
        else:
            self.cbox.setVisible(False)

    def _on_target_class_changed(self):
        self._update_stats_model()

    def _invalidate(self, which=None):
        # Invalidate learner results for `which` input keys
        # (if None then all learner results are invalidated)
        if which is None:
            which = self.learners.keys()

        model = self.view.model()
        statmodelkeys = [model.item(row, 0).data(Qt.UserRole)
                         for row in range(model.rowCount())]

        for key in which:
            self.learners[key] = \
                self.learners[key]._replace(results=None, stats=None)

            if key in statmodelkeys:
                row = statmodelkeys.index(key)
                for c in range(1, model.columnCount()):
                    item = model.item(row, c)
                    if item is not None:
                        item.setData(None, Qt.DisplayRole)
                        item.setData(None, Qt.ToolTipRole)

        self.apply_button.setEnabled(True)

    def apply(self):
        self.apply_button.setEnabled(False)
        self._update_header()
        # Update the view to display the model names
        self._update_stats_model()
        self._update_results()
        self._update_stats_model()
        self.commit()

    def commit(self):
        valid = [slot for slot in self.learners.values()
                 if slot.results is not None and slot.results.success]
        if valid:
            # Evaluation results
            combined = results_merge([slot.results.value for slot in valid])
            combined.learner_names = [learner_name(slot.learner)
                                      for slot in valid]

            # Predictions & Probabilities
            predictions = combined.get_augmented_data(combined.learner_names)
        else:
            combined = None
            predictions = None
        self.send("Evaluation Results", combined)
        self.send("Predictions", predictions)
Ejemplo n.º 7
0
class AddToProject(QDialog):
    """Dialog to let the user choose one of the folders from the opened proj"""
    def __init__(self, projects, parent=None):
        super(AddToProject, self).__init__(parent)
        # pathProjects must be a list
        self._projects = projects
        self.setWindowTitle("XX")
        self.pathSelected = ''
        vbox = QVBoxLayout(self)

        hbox = QHBoxLayout()
        self._list = QListWidget()
        for project in self._projects:
            self._list.addItem(project['name'])

        # self._list.setCurrentRow(0)
        self._tree = QTreeView()
        # self._tree.header().setHidden(True)
        # self._tree.setSelectionMode(QTreeView.SingleSelection)
        # self._tree.setAnimated(True)
        self.load_tree(self._projects[0])
        hbox.addWidget(self._list)
        hbox.addWidget(self._tree)
        vbox.addLayout(hbox)

        hbox2 = QHBoxLayout()
        btnAdd = QPushButton("XX")
        btnCancel = QPushButton("XX")
        hbox2.addWidget(btnCancel)
        hbox2.addWidget(btnAdd)
        vbox.addLayout(hbox2)

        self.connect(btnCancel, SIGNAL("clicked()"), self.close)
        self.connect(btnAdd, SIGNAL("clicked()"), self._select_path)
        self.connect(
            self._list,
            SIGNAL("currentItemChanged(QTreeWidgetItem*, QTreeWidgetItem*)"),
            self._project_changed)

    def _project_changed(self, item, previous):
        # FIXME, this is not being called, at least in osx
        for each_project in self._projects:
            if each_project.name == item.text():
                self.load_tree(each_project)

    def load_tree(self, project):
        """Load the tree view on the right based on the project selected."""
        qfsm = QFileSystemModel()
        qfsm.setRootPath(project['path'])
        load_index = qfsm.index(qfsm.rootPath())
        # qfsm.setFilter(QDir.AllDirs | QDir.NoDotAndDotDot)
        # qfsm.setNameFilterDisables(False)
        # pext = ["*{0}".format(x) for x in project['extensions']]
        # qfsm.setNameFilters(pext)

        self._tree.setModel(qfsm)
        self._tree.setRootIndex(load_index)

        t_header = self._tree.header()
        # t_header.setHorizontalScrollMode(QAbstractItemView.ScrollPerPixel)
        # t_header.setResizeMode(0, QHeaderView.Stretch)
        # t_header.setStretchLastSection(False)
        # t_header.setClickable(True)
        #

        # self._tree.hideColumn(1)  # Size
        # self._tree.hideColumn(2)  # Type
        # self._tree.hideColumn(3)  # Modification date

        # FIXME: Changing the name column's title requires some magic
        # Please look at the project tree

    def _select_path(self):
        """Set pathSelected to the folder selected in the tree."""
        path = self._tree.model().filePath(self._tree.currentIndex())
        if path:
            self.pathSelected = path
            self.close()
Ejemplo n.º 8
0
class OWGEODatasets(OWWidget):
    name = "GEO Data Sets"
    description = DESCRIPTION
    icon = "../widgets/icons/GEODataSets.svg"
    priority = PRIORITY

    inputs = []
    outputs = [("Expression Data", Orange.data.Table)]

    settingsList = ["outputRows", "mergeSpots", "gdsSelectionStates",
                    "splitterSettings", "currentGds", "autoCommit",
                    "datasetNames"]

    outputRows = Setting(False)
    mergeSpots = Setting(True)
    gdsSelectionStates = Setting({})
    currentGds = Setting(None)
    datasetNames = Setting({})
    splitterSettings = Setting(
        (b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xea\x00\x00\x00\xd7\x01\x00\x00\x00\x07\x01\x00\x00\x00\x02',
         b'\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xb5\x00\x00\x02\x10\x01\x00\x00\x00\x07\x01\x00\x00\x00\x01')
    )

    autoCommit = Setting(True)

    def __init__(self, parent=None, signalManager=None, name=" GEO Data Sets"):
        OWWidget.__init__(self, parent, signalManager, name)

        self.selectionChanged = False
        self.filterString = ""
        self.datasetName = ""
        ## GUI
        box = gui.widgetBox(self.controlArea, "Info", addSpace=True)
        self.infoBox = gui.widgetLabel(box, "Initializing\n\n")

        box = gui.widgetBox(self.controlArea, "Output", addSpace=True)
        gui.radioButtonsInBox(box, self, "outputRows",
                              ["Genes or spots", "Samples"], "Rows",
                              callback=self.commitIf)

        gui.checkBox(box, self, "mergeSpots", "Merge spots of same gene",
                     callback=self.commitIf)

        gui.separator(box)
        self.nameEdit = gui.lineEdit(
            box, self, "datasetName", "Data set name",
            tooltip="Override the default output data set name",
            callback=self.onNameEdited
        )
        self.nameEdit.setPlaceholderText("")

        if sys.version_info < (3, ):
            box = gui.widgetBox(self.controlArea, "Commit", addSpace=True)
            self.commitButton = gui.button(
                box, self, "Commit", callback=self.commit)
            cb = gui.checkBox(box, self, "autoCommit", "Commit on any change")
            gui.setStopper(self, self.commitButton, cb, "selectionChanged",
                           self.commit)
        else:
            gui.auto_commit(self.controlArea, self, "autoCommit", "Commit",
                            box="Commit")

        gui.rubber(self.controlArea)

#         self.filterLineEdit = OWGUIEx.lineEditHint(
#             self.mainArea, self, "filterString", "Filter",
#             caseSensitive=False, matchAnywhere=True,
#             callback=self.filter,  delimiters=" ")

        gui.widgetLabel(self.mainArea, "Filter")
        self.filterLineEdit = QLineEdit(
            textChanged=self.filter
        )
        self.completer = QCompleter(
            caseSensitivity=Qt.CaseInsensitive
        )
        self.completer.setModel(QStringListModel(self))
        self.filterLineEdit.setCompleter(self.completer)

        self.mainArea.layout().addWidget(self.filterLineEdit)

        splitter = QSplitter(Qt.Vertical, self.mainArea)
        self.mainArea.layout().addWidget(splitter)
        self.treeWidget = QTreeView(splitter)

        self.treeWidget.setSelectionMode(QTreeView.SingleSelection)
        self.treeWidget.setRootIsDecorated(False)
        self.treeWidget.setSortingEnabled(True)
        self.treeWidget.setAlternatingRowColors(True)
        self.treeWidget.setUniformRowHeights(True)
        self.treeWidget.setEditTriggers(QTreeView.NoEditTriggers)

        linkdelegate = LinkStyledItemDelegate(self.treeWidget)
        self.treeWidget.setItemDelegateForColumn(1, linkdelegate)
        self.treeWidget.setItemDelegateForColumn(8, linkdelegate)
        self.treeWidget.setItemDelegateForColumn(
            0, gui.IndicatorItemDelegate(self.treeWidget,
                                         role=Qt.DisplayRole))

        proxyModel = MySortFilterProxyModel(self.treeWidget)
        self.treeWidget.setModel(proxyModel)
        self.treeWidget.selectionModel().selectionChanged.connect(
            self.updateSelection
        )
        self.treeWidget.viewport().setMouseTracking(True)

        splitterH = QSplitter(Qt.Horizontal, splitter)

        box = gui.widgetBox(splitterH, "Description")
        self.infoGDS = gui.widgetLabel(box, "")
        self.infoGDS.setWordWrap(True)
        gui.rubber(box)

        box = gui.widgetBox(splitterH, "Sample Annotations")
        self.annotationsTree = QTreeWidget(box)
        self.annotationsTree.setHeaderLabels(
            ["Type (Sample annotations)", "Sample count"]
        )
        self.annotationsTree.setRootIsDecorated(True)
        box.layout().addWidget(self.annotationsTree)
        self.annotationsTree.itemChanged.connect(
            self.annotationSelectionChanged
        )
        self._annotationsUpdating = False
        self.splitters = splitter, splitterH

        for sp, setting in zip(self.splitters, self.splitterSettings):
            sp.splitterMoved.connect(self.splitterMoved)
            sp.restoreState(setting)

        self.searchKeys = ["dataset_id", "title", "platform_organism",
                           "description"]

        self.gds = []
        self.gds_info = None

        self.resize(1000, 600)

        self.setBlocking(True)
        self.setEnabled(False)
        self.progressBarInit()

        self._executor = ThreadExecutor()

        func = partial(get_gds_model,
                       methodinvoke(self, "_setProgress", (float,)))
        self._inittask = Task(function=func)
        self._inittask.finished.connect(self._initializemodel)
        self._executor.submit(self._inittask)

        self._datatask = None

    @Slot(float)
    def _setProgress(self, value):
        self.progressBarValue = value

    def _initializemodel(self):
        assert self.thread() is QThread.currentThread()
        model, self.gds_info, self.gds = self._inittask.result()
        model.setParent(self)

        proxy = self.treeWidget.model()
        proxy.setFilterKeyColumn(0)
        proxy.setFilterRole(TextFilterRole)
        proxy.setFilterCaseSensitivity(False)
        proxy.setFilterFixedString(self.filterString)

        proxy.setSourceModel(model)
        proxy.sort(0, Qt.DescendingOrder)

        self.progressBarFinished()
        self.setBlocking(False)
        self.setEnabled(True)

        filter_items = " ".join(
            gds[key] for gds in self.gds for key in self.searchKeys
        )
        tr_chars = ",.:;!?(){}[]_-+\\|/%#@$^&*<>~`"
        tr_table = str.maketrans(tr_chars, " " * len(tr_chars))
        filter_items = filter_items.translate(tr_table)

        filter_items = sorted(set(filter_items.split(" ")))
        filter_items = [item for item in filter_items if len(item) > 3]

        self.completer.model().setStringList(filter_items)
#         self.filterLineEdit.setItems(filter_items)

        if self.currentGds:
            current_id = self.currentGds["dataset_id"]
            gdss = [(i, qunpack(proxy.data(proxy.index(i, 1), Qt.DisplayRole)))
                    for i in range(proxy.rowCount())]
            current = [i for i, data in gdss if data and data == current_id]
            if current:
                current_index = proxy.index(current[0], 0)
                self.treeWidget.selectionModel().select(
                    current_index,
                    QItemSelectionModel.Select | QItemSelectionModel.Rows
                )
                self.treeWidget.scrollTo(
                    current_index, QTreeView.PositionAtCenter)

        for i in range(8):
            self.treeWidget.resizeColumnToContents(i)

        self.treeWidget.setColumnWidth(
            1, min(self.treeWidget.columnWidth(1), 300))
        self.treeWidget.setColumnWidth(
            2, min(self.treeWidget.columnWidth(2), 200))

        self.updateInfo()

    def updateInfo(self):
        gds_info = self.gds_info
        text = ("%i datasets\n%i datasets cached\n" %
                (len(gds_info),
                 len(glob.glob(serverfiles.localpath("GEO") + "/GDS*"))))
        filtered = self.treeWidget.model().rowCount()
        if len(self.gds) != filtered:
            text += ("%i after filtering") % filtered
        self.infoBox.setText(text)

    def updateSelection(self, *args):
        current = self.treeWidget.selectedIndexes()
        mapToSource = self.treeWidget.model().mapToSource
        current = [mapToSource(index).row() for index in current]
        if current:
            self.currentGds = self.gds[current[0]]
            self.setAnnotations(self.currentGds)
            self.infoGDS.setText(self.currentGds.get("description", ""))
            self.nameEdit.setPlaceholderText(self.currentGds["title"])
            self.datasetName = \
                self.datasetNames.get(self.currentGds["dataset_id"], "")
        else:
            self.currentGds = None
            self.nameEdit.setPlaceholderText("")
            self.datasetName = ""

        self.commitIf()

    def setAnnotations(self, gds):
        self._annotationsUpdating = True
        self.annotationsTree.clear()

        annotations = defaultdict(set)
        subsetscount = {}
        for desc in gds["subsets"]:
            annotations[desc["type"]].add(desc["description"])
            subsetscount[desc["description"]] = str(len(desc["sample_id"]))

        for type, subsets in annotations.items():
            key = (gds["dataset_id"], type)
            subsetItem = QTreeWidgetItem(self.annotationsTree, [type])
            subsetItem.setFlags(subsetItem.flags() | Qt.ItemIsUserCheckable |
                                Qt.ItemIsTristate)
            subsetItem.setCheckState(
                0, self.gdsSelectionStates.get(key, Qt.Checked)
            )
            subsetItem.key = key
            for subset in subsets:
                key = (gds["dataset_id"], type, subset)
                item = QTreeWidgetItem(
                    subsetItem, [subset, subsetscount.get(subset, "")]
                )
                item.setFlags(item.flags() | Qt.ItemIsUserCheckable)
                item.setCheckState(
                    0, self.gdsSelectionStates.get(key, Qt.Checked)
                )
                item.key = key
        self._annotationsUpdating = False
        self.annotationsTree.expandAll()
        for i in range(self.annotationsTree.columnCount()):
            self.annotationsTree.resizeColumnToContents(i)

    def annotationSelectionChanged(self, item, column):
        if self._annotationsUpdating:
            return
        for i in range(self.annotationsTree.topLevelItemCount()):
            item = self.annotationsTree.topLevelItem(i)
            self.gdsSelectionStates[item.key] = item.checkState(0)
            for j in range(item.childCount()):
                child = item.child(j)
                self.gdsSelectionStates[child.key] = child.checkState(0)

    def filter(self):
        filter_string = unicode(self.filterLineEdit.text())
        proxyModel = self.treeWidget.model()
        if proxyModel:
            strings = filter_string.lower().strip().split()
            proxyModel.setFilterFixedStrings(strings)
            self.updateInfo()

    def selectedSamples(self):
        """
        Return the currently selected sample annotations.

        The return value is a list of selected (sample type, sample value)
        tuples.

        .. note:: if some Sample annotation type has no selected values.
                  this method will return all values for it.

        """
        samples = []
        unused_types = []
        used_types = []
        for stype in childiter(self.annotationsTree.invisibleRootItem()):
            selected_values = []
            all_values = []
            for sval in childiter(stype):
                value = (str(stype.text(0)), str(sval.text(0)))
                if self.gdsSelectionStates.get(sval.key, True):
                    selected_values.append(value)
                all_values.append(value)
            if selected_values:
                samples.extend(selected_values)
                used_types.append(str(stype.text(0)))
            else:
                # If no sample of sample type is selected we don't filter
                # on it.
                samples.extend(all_values)
                unused_types.append(str(stype.text(0)))

        return samples, used_types

    def commitIf(self):
        if self.autoCommit:
            self.commit()
        else:
            self.selectionChanged = True

    def commit(self):
        if self.currentGds:
            self.error(0)
            sample_type = None
            self.progressBarInit()
            self.progressBarSet(10)

            _, groups = self.selectedSamples()
            if len(groups) == 1 and self.outputRows:
                sample_type = groups[0]

            self.setEnabled(False)
            self.setBlocking(True)

            def get_data(gds_id, report_genes, transpose, sample_type, title):
                gds = geo.GDS(gds_id)
                data = gds.getdata(
                    report_genes=report_genes, transpose=transpose,
                    sample_type=sample_type
                )
                data.name = title
                return data

            get_data = partial(
                get_data, self.currentGds["dataset_id"],
                report_genes=self.mergeSpots,
                transpose=self.outputRows,
                sample_type=sample_type,
                title=self.datasetName or self.currentGds["title"]
            )
            self._datatask = Task(function=get_data)
            self._datatask.finished.connect(self._on_dataready)
            self._executor.submit(self._datatask)

    def _on_dataready(self):
        self.setEnabled(True)
        self.setBlocking(False)

        self.progressBarSet(50)

        try:
            data = self._datatask.result()
        except urlrequest.URLError as error:
            self.error(0, "Error while connecting to the NCBI ftp server! %r" %
                       error)
            self._datatask = None
            self.progressBarFinished()
            return

        self._datatask = None

        data_name = data.name
        samples, _ = self.selectedSamples()

        self.warning(0)
        message = None
        if self.outputRows:
            def samplesinst(ex):
                out = []
                for meta in data.domain.metas:
                    out.append((meta.name, ex[meta].value))

                if data.domain.class_var.name != 'class':
                    out.append((data.domain.class_var.name,
                                ex[data.domain.class_var].value))

                return out
            samples = set(samples)
            mask = [samples.issuperset(samplesinst(ex)) for ex in data]
            data = data[numpy.array(mask, dtype=bool)]
            if len(data) == 0:
                message = "No samples with selected sample annotations."
        else:
            samples = set(samples)
            domain = Orange.data.Domain(
                [attr for attr in data.domain.attributes
                 if samples.issuperset(attr.attributes.items())],
                data.domain.class_var,
                data.domain.metas
            )
#             domain.addmetas(data.domain.getmetas())

            if len(domain.attributes) == 0:
                message = "No samples with selected sample annotations."
            stypes = set(s[0] for s in samples)
            for attr in domain.attributes:
                attr.attributes = dict(
                    (key, value) for key, value in attr.attributes.items()
                    if key in stypes
                )
            data = Orange.data.Table(domain, data)

        if message is not None:
            self.warning(0, message)

        data_hints.set_hint(data, "taxid", self.currentGds.get("taxid", ""),
                            10.0)
        data_hints.set_hint(data, "genesinrows", self.outputRows, 10.0)

        self.progressBarFinished()
        data.name = data_name
        self.send("Expression Data", data)

        model = self.treeWidget.model().sourceModel()
        row = self.gds.index(self.currentGds)

        model.setData(model.index(row, 0),  " ", Qt.DisplayRole)

        self.updateInfo()
        self.selectionChanged = False

    def splitterMoved(self, *args):
        self.splitterSettings = [str(sp.saveState()) for sp in self.splitters]

    def onDeleteWidget(self):
        if self._inittask:
            self._inittask.future().cancel()
            self._inittask.finished.disconnect(self._initializemodel)
        if self._datatask:
            self._datatask.future().cancel()
            self._datatask.finished.disconnect(self._on_dataready)
        self._executor.shutdown(wait=False)

        super(OWGEODatasets, self).onDeleteWidget()

    def onNameEdited(self):
        if self.currentGds:
            gds_id = self.currentGds["dataset_id"]
            self.datasetNames[gds_id] = unicode(self.nameEdit.text())
            self.commitIf()
Ejemplo n.º 9
0
class OWTestLearners(widget.OWWidget):
    name = "Test & Score"
    description = "Cross-validation accuracy estimation."
    icon = "icons/TestLearners1.svg"
    priority = 100

    inputs = [("Learner", Learner, "set_learner", widget.Multiple),
              ("Data", Table, "set_train_data", widget.Default),
              ("Test Data", Table, "set_test_data"),
              ("Preprocessor", Preprocess, "set_preprocessor")]

    outputs = [("Predictions", Orange.data.Table),
               ("Evaluation Results", Orange.evaluation.Results)]

    settingsHandler = settings.ClassValuesContextHandler()

    #: Resampling/testing types
    KFold, ShuffleSplit, LeaveOneOut, TestOnTrain, TestOnTest = 0, 1, 2, 3, 4

    #: Selected resampling type
    resampling = settings.Setting(0)
    #: Number of folds for K-fold cross validation
    k_folds = settings.Setting(10)
    #: Stratified sampling for K-fold
    cv_stratified = settings.Setting(True)
    #: Number of repeats for ShuffleSplit sampling
    n_repeat = settings.Setting(10)
    #: ShuffleSplit sampling p
    sample_p = settings.Setting(75)
    #: Stratified sampling for Random Sampling
    shuffle_stratified = settings.Setting(True)

    TARGET_AVERAGE = "(Average over classes)"
    class_selection = settings.ContextSetting(TARGET_AVERAGE)

    def __init__(self):
        super().__init__()

        self.data = None
        self.test_data = None
        self.preprocessor = None
        self.train_data_missing_vals = False
        self.test_data_missing_vals = False

        #: An Ordered dictionary with current inputs and their testing
        #: results.
        self.learners = OrderedDict()

        sbox = gui.widgetBox(self.controlArea, "Sampling")
        rbox = gui.radioButtons(sbox,
                                self,
                                "resampling",
                                callback=self._param_changed)

        gui.appendRadioButton(rbox, "Cross validation")
        ibox = gui.indentedBox(rbox)
        gui.spin(ibox,
                 self,
                 "k_folds",
                 2,
                 50,
                 label="Number of folds:",
                 callback=self.kfold_changed)
        gui.checkBox(ibox,
                     self,
                     "cv_stratified",
                     "Stratified",
                     callback=self.kfold_changed)

        gui.appendRadioButton(rbox, "Random sampling")
        ibox = gui.indentedBox(rbox)
        gui.spin(ibox,
                 self,
                 "n_repeat",
                 2,
                 50,
                 label="Repeat train/test",
                 callback=self.shuffle_split_changed)
        gui.widgetLabel(ibox, "Relative training set size:")
        gui.hSlider(ibox,
                    self,
                    "sample_p",
                    minValue=1,
                    maxValue=99,
                    ticks=20,
                    vertical=False,
                    labelFormat="%d %%",
                    callback=self.shuffle_split_changed)
        gui.checkBox(ibox,
                     self,
                     "shuffle_stratified",
                     "Stratified",
                     callback=self.shuffle_split_changed)

        gui.appendRadioButton(rbox, "Leave one out")

        gui.appendRadioButton(rbox, "Test on train data")
        gui.appendRadioButton(rbox, "Test on test data")

        rbox.layout().addSpacing(5)
        self.apply_button = gui.button(rbox,
                                       self,
                                       "Apply",
                                       callback=self.apply,
                                       default=True)

        self.cbox = gui.widgetBox(self.controlArea, "Target class")
        self.class_selection_combo = gui.comboBox(
            self.cbox,
            self,
            "class_selection",
            items=[],
            sendSelectedValue=True,
            valueType=str,
            callback=self._on_target_class_changed,
            contentsLength=8)

        gui.rubber(self.controlArea)

        self.view = QTreeView(rootIsDecorated=False,
                              uniformRowHeights=True,
                              wordWrap=True,
                              editTriggers=QTreeView.NoEditTriggers)
        header = self.view.header()
        header.setResizeMode(QHeaderView.ResizeToContents)
        header.setDefaultAlignment(Qt.AlignCenter)
        header.setStretchLastSection(False)

        self.result_model = QStandardItemModel(self)
        self.result_model.setHorizontalHeaderLabels(["Method"])
        self.view.setModel(self.result_model)
        self.view.setItemDelegate(ItemDelegate())

        box = gui.widgetBox(self.mainArea, "Evaluation Results")
        box.layout().addWidget(self.view)

    def sizeHint(self):
        return QSize(780, 1)

    def set_learner(self, learner, key):
        """
        Set the input `learner` for `key`.
        """
        if key in self.learners and learner is None:
            # Removed
            del self.learners[key]
        else:
            self.learners[key] = Input(learner, None, None)
            self._invalidate([key])

    def set_train_data(self, data):
        """
        Set the input training dataset.
        """
        self.error(0)
        self.information(0)
        if data and not data.domain.class_var:
            self.error(0, "Train data input requires a class variable")
            data = None

        if isinstance(data, SqlTable):
            if data.approx_len() < AUTO_DL_LIMIT:
                data = Table(data)
            else:
                self.information(0, "Train data has been sampled")
                data_sample = data.sample_time(1, no_cache=True)
                data_sample.download_data(AUTO_DL_LIMIT, partial=True)
                data = Table(data_sample)

        self.warning(4)
        self.train_data_missing_vals = data is not None and \
                                       np.isnan(data.Y).any()
        if self.train_data_missing_vals or self.test_data_missing_vals:
            self.warning(
                4,
                self._get_missing_data_warning(self.train_data_missing_vals,
                                               self.test_data_missing_vals))
            if data:
                data = RemoveNaNClasses(data)

        self.data = data
        self.closeContext()
        if data is not None:
            self._update_class_selection()
            self.openContext(data.domain.class_var)
        self._invalidate()

    def set_test_data(self, data):
        """
        Set the input separate testing dataset.
        """
        self.error(1)
        self.information(1)
        if data and not data.domain.class_var:
            self.error(1, "Test data input requires a class variable")
            data = None

        if isinstance(data, SqlTable):
            if data.approx_len() < AUTO_DL_LIMIT:
                data = Table(data)
            else:
                self.information(1, "Test data has been sampled")
                data_sample = data.sample_time(1, no_cache=True)
                data_sample.download_data(AUTO_DL_LIMIT, partial=True)
                data = Table(data_sample)

        self.warning(4)
        self.test_data_missing_vals = data is not None and \
                                      np.isnan(data.Y).any()
        if self.train_data_missing_vals or self.test_data_missing_vals:
            self.warning(
                4,
                self._get_missing_data_warning(self.train_data_missing_vals,
                                               self.test_data_missing_vals))
            if data:
                data = RemoveNaNClasses(data)

        self.test_data = data
        if self.resampling == OWTestLearners.TestOnTest:
            self._invalidate()

    def _get_missing_data_warning(self, train_missing, test_missing):
        return "Instances with unknown target values were removed from{}data"\
            .format(train_missing * test_missing * " "
                    or train_missing * " train " or test_missing * " test ")

    def set_preprocessor(self, preproc):
        """
        Set the input preprocessor to apply on the training data.
        """
        self.preprocessor = preproc
        self._invalidate()

    def handleNewSignals(self):
        """Reimplemented from OWWidget.handleNewSignals."""
        self._update_class_selection()
        self.apply()

    def kfold_changed(self):
        self.resampling = OWTestLearners.KFold
        self._param_changed()

    def shuffle_split_changed(self):
        self.resampling = OWTestLearners.ShuffleSplit
        self._param_changed()

    def _param_changed(self):
        self._invalidate()

    def _update_results(self):
        """
        Run/evaluate the learners.
        """
        self.warning([1, 2])
        self.error([2, 4])
        if self.data is None:
            return

        class_var = self.data.domain.class_var

        if self.resampling == OWTestLearners.TestOnTest:
            if self.test_data is None:
                self.warning(2, "Missing separate test data input")
                return
            elif self.test_data.domain.class_var != class_var:
                self.error(2, ("Inconsistent class variable between test " +
                               "and train data sets"))
                return

        # items in need of an update
        items = [(key, slot) for key, slot in self.learners.items()
                 if slot.results is None]
        learners = [slot.learner for _, slot in items]
        if len(items) == 0:
            return

        if self.test_data is not None and \
                self.resampling != OWTestLearners.TestOnTest:
            self.warning(
                1, "Test data is present but unused. "
                "Select 'Test on test data' to use it.")

        rstate = 42

        def update_progress(finished):
            self.progressBarSet(100 * finished)

        common_args = dict(store_data=True,
                           preprocessor=self.preprocessor,
                           callback=update_progress)
        self.setStatusMessage("Running")

        with self.progressBar():
            try:
                if self.resampling == OWTestLearners.KFold:
                    if len(self.data) < self.k_folds:
                        self.error(4, "Number of folds exceeds the data size")
                        return

                    warnings = []
                    results = Orange.evaluation.CrossValidation(
                        self.data,
                        learners,
                        k=self.k_folds,
                        random_state=rstate,
                        warnings=warnings,
                        **common_args)
                    if warnings:
                        self.warning(2, warnings[0])
                elif self.resampling == OWTestLearners.LeaveOneOut:
                    results = Orange.evaluation.LeaveOneOut(
                        self.data, learners, **common_args)
                elif self.resampling == OWTestLearners.ShuffleSplit:
                    train_size = self.sample_p / 100
                    results = Orange.evaluation.ShuffleSplit(
                        self.data,
                        learners,
                        n_resamples=self.n_repeat,
                        train_size=train_size,
                        test_size=None,
                        stratified=self.shuffle_stratified,
                        random_state=rstate,
                        **common_args)
                elif self.resampling == OWTestLearners.TestOnTrain:
                    results = Orange.evaluation.TestOnTrainingData(
                        self.data, learners, **common_args)
                elif self.resampling == OWTestLearners.TestOnTest:
                    results = Orange.evaluation.TestOnTestData(
                        self.data, self.test_data, learners, **common_args)
                else:
                    assert False
            except RuntimeError as e:
                self.error(2, str(e))
                self.setStatusMessage("")
                return

        learner_key = {
            slot.learner: key
            for key, slot in self.learners.items()
        }
        for learner, result in zip(learners, split_by_model(results)):
            stats = None
            if class_var.is_discrete:
                scorers = classification_stats.scores
            elif class_var.is_continuous:
                scorers = regression_stats.scores
            else:
                scorers = None
            if scorers:
                ex = result.failed[0]
                if ex:
                    stats = [Try.Fail(ex)] * len(scorers)
                    result = Try.Fail(ex)
                else:
                    stats = [Try(lambda: score(result)) for score in scorers]
                    result = Try.Success(result)
            key = learner_key[learner]
            self.learners[key] = \
                self.learners[key]._replace(results=result, stats=stats)

        self.setStatusMessage("")

    def _update_header(self):
        # Set the correct horizontal header labels on the results_model.
        headers = ["Method"]
        if self.data is not None:
            if self.data.domain.has_discrete_class:
                headers.extend(classification_stats.headers)
            else:
                headers.extend(regression_stats.headers)

        # remove possible extra columns from the model.
        for i in reversed(range(len(headers),
                                self.result_model.columnCount())):
            self.result_model.takeColumn(i)

        self.result_model.setHorizontalHeaderLabels(headers)

    def _update_stats_model(self):
        # Update the results_model with up to date scores.
        # Note: The target class specific scores (if requested) are
        # computed as needed in this method.
        model = self.view.model()
        # clear the table model, but preserving the header labels
        for r in reversed(range(model.rowCount())):
            model.takeRow(r)

        target_index = None
        if self.data is not None:
            class_var = self.data.domain.class_var
            if self.data.domain.has_discrete_class and \
                            self.class_selection != self.TARGET_AVERAGE:
                target_index = class_var.values.index(self.class_selection)
        else:
            class_var = None

        errors = []
        has_missing_scores = False

        for key, slot in self.learners.items():
            name = learner_name(slot.learner)
            head = QStandardItem(name)
            head.setData(key, Qt.UserRole)
            if isinstance(slot.results, Try.Fail):
                head.setToolTip(str(slot.results.exception))
                head.setText("{} (error)".format(name))
                head.setForeground(QtGui.QBrush(Qt.red))
                errors.append("{name} failed with error:\n"
                              "{exc.__class__.__name__}: {exc!s}".format(
                                  name=name, exc=slot.results.exception))

            row = [head]

            if class_var is not None and class_var.is_discrete and \
                    target_index is not None:
                if slot.results is not None and slot.results.success:
                    ovr_results = results_one_vs_rest(slot.results.value,
                                                      target_index)

                    stats = [
                        Try(lambda: score(ovr_results))
                        for score in classification_stats.scores
                    ]
                else:
                    stats = None
            else:
                stats = slot.stats

            if stats is not None:
                for stat in stats:
                    item = QStandardItem()
                    if stat.success:
                        item.setText("{:.3f}".format(stat.value[0]))
                    else:
                        item.setToolTip(str(stat.exception))
                        has_missing_scores = True
                    row.append(item)

            model.appendRow(row)

        if errors:
            self.error(3, "\n".join(errors))
        else:
            self.error(3)

        if has_missing_scores:
            self.warning(3, "Some scores could not be computed")
        else:
            self.warning(3)

    def _update_class_selection(self):
        self.class_selection_combo.setCurrentIndex(-1)
        self.class_selection_combo.clear()
        if not self.data:
            return

        if self.data.domain.has_discrete_class:
            self.cbox.setVisible(True)
            class_var = self.data.domain.class_var
            items = [self.TARGET_AVERAGE] + class_var.values
            self.class_selection_combo.addItems(items)

            class_index = 0
            if self.class_selection in class_var.values:
                class_index = class_var.values.index(self.class_selection) + 1

            self.class_selection_combo.setCurrentIndex(class_index)
            self.class_selection = items[class_index]
        else:
            self.cbox.setVisible(False)

    def _on_target_class_changed(self):
        self._update_stats_model()

    def _invalidate(self, which=None):
        # Invalidate learner results for `which` input keys
        # (if None then all learner results are invalidated)
        if which is None:
            which = self.learners.keys()

        model = self.view.model()
        statmodelkeys = [
            model.item(row, 0).data(Qt.UserRole)
            for row in range(model.rowCount())
        ]

        for key in which:
            self.learners[key] = \
                self.learners[key]._replace(results=None, stats=None)

            if key in statmodelkeys:
                row = statmodelkeys.index(key)
                for c in range(1, model.columnCount()):
                    item = model.item(row, c)
                    if item is not None:
                        item.setData(None, Qt.DisplayRole)
                        item.setData(None, Qt.ToolTipRole)

        self.apply_button.setEnabled(True)

    def apply(self):
        self.apply_button.setEnabled(False)
        self._update_header()
        # Update the view to display the model names
        self._update_stats_model()
        self._update_results()
        self._update_stats_model()
        self.commit()

    def commit(self):
        valid = [
            slot for slot in self.learners.values()
            if slot.results is not None and slot.results.success
        ]
        if valid:
            # Evaluation results
            combined = results_merge([slot.results.value for slot in valid])
            combined.learner_names = [
                learner_name(slot.learner) for slot in valid
            ]

            # Predictions & Probabilities
            predictions = combined.get_augmented_data(combined.learner_names)
        else:
            combined = None
            predictions = None
        self.send("Evaluation Results", combined)
        self.send("Predictions", predictions)

    def send_report(self):
        if not self.data or not self.learners:
            return
        if self.resampling == self.KFold:
            stratified = 'Stratified ' if self.cv_stratified else ''
            items = [
                ("Sampling type",
                 "{}{}-fold Cross validation".format(stratified, self.k_folds))
            ]
        elif self.resampling == self.LeaveOneOut:
            items = [("Sampling type", "Leave one out")]
        elif self.resampling == self.ShuffleSplit:
            stratified = 'Stratified ' if self.shuffle_stratified else ''
            items = [
                ("Sampling type",
                 "{}Shuffle split, {} random samples with {}% data ".format(
                     stratified, self.n_repeat, self.sample_p))
            ]
        elif self.resampling == self.TestOnTrain:
            items = [("Sampling type", "No sampling, test on training data")]
        elif self.resampling == self.TestOnTest:
            items = [("Sampling type", "No sampling, test on testing data")]
        else:
            items = []
        if self.data.domain.has_discrete_class:
            items += [("Target class", self.class_selection.strip("()"))]
        if items:
            self.report_items("Settings", items)
        self.report_table("Scores", self.view)
Ejemplo n.º 10
0
class OWTestLearners(widget.OWWidget):
    name = "Test Learners"
    description = "Cross-validation accuracy estimation."
    icon = "icons/TestLearners1.svg"
    priority = 100

    inputs = [("Learner", Orange.classification.Learner,
               "set_learner", widget.Multiple),
              ("Data", Orange.data.Table, "set_train_data", widget.Default),
              ("Test Data", Orange.data.Table, "set_test_data")]

    outputs = [("Evaluation Results", Orange.evaluation.Results)]

    settingsHandler = settings.ClassValuesContextHandler()

    #: Resampling/testing types
    KFold, LeaveOneOut, Bootstrap, TestOnTrain, TestOnTest = 0, 1, 2, 3, 4

    #: Selected resampling type
    resampling = settings.Setting(0)
    #: Number of folds for K-fold cross validation
    k_folds = settings.Setting(10)
    #: Number of repeats for bootstrap sampling
    n_repeat = settings.Setting(10)
    #: Bootstrap sampling p
    sample_p = settings.Setting(75)

    class_selection = settings.ContextSetting("(None)")

    def __init__(self, parent=None):
        super().__init__(parent)

        self.train_data = None
        self.test_data = None

        #: An Ordered dictionary with current inputs and their testing
        #: results.
        self.learners = OrderedDict()

        sbox = gui.widgetBox(self.controlArea, "Sampling")
        rbox = gui.radioButtons(
            sbox, self, "resampling", callback=self._param_changed
        )
        gui.appendRadioButton(rbox, "Cross validation")
        ibox = gui.indentedBox(rbox)
        gui.spin(ibox, self, "k_folds", 2, 50, label="Number of folds:",
                 callback=self.kfold_changed)
        gui.appendRadioButton(rbox, "Leave one out")
        gui.appendRadioButton(rbox, "Random sampling")
        ibox = gui.indentedBox(rbox)
        gui.spin(ibox, self, "n_repeat", 2, 50, label="Repeat train/test",
                 callback=self.bootstrap_changed)
        gui.widgetLabel(ibox, "Relative training set size:")
        gui.hSlider(ibox, self, "sample_p", minValue=1, maxValue=100,
                    ticks=20, vertical=False, labelFormat="%d %%",
                    callback=self.bootstrap_changed)

        gui.appendRadioButton(rbox, "Test on train data")
        gui.appendRadioButton(rbox, "Test on test data")

        rbox.layout().addSpacing(5)
        gui.button(rbox, self, "Apply", callback=self.apply)

        self.cbox = gui.widgetBox(self.controlArea, "Target class")
        self.class_selection_combo = gui.comboBox(self.cbox, self, "class_selection",
             items=[],
             callback=self._select_class,
             sendSelectedValue=True, valueType=str)

        gui.rubber(self.controlArea)


        self.view = QTreeView(
            rootIsDecorated=False,
            uniformRowHeights=True,
            wordWrap=True,
            editTriggers=QTreeView.NoEditTriggers
        )
        header = self.view.header()
        header.setResizeMode(QHeaderView.ResizeToContents)
        header.setDefaultAlignment(Qt.AlignCenter)
        header.setStretchLastSection(False)

        self.result_model = QStandardItemModel()
        self.view.setModel(self.result_model)
        self.view.setItemDelegate(ItemDelegate())
        self._update_header()
        box = gui.widgetBox(self.mainArea, "Evaluation Results")
        box.layout().addWidget(self.view)

    def set_learner(self, learner, key):
        if key in self.learners and learner is None:
            del self.learners[key]
        else:
            self.learners[key] = Input(learner, None, ())
        self._update_stats_model()

    def set_train_data(self, data):
        self.error(0)
        if data and not data.domain.class_var:
            self.error(0, "Train data input requires a class variable")
            data = None

        self.train_data = data
        self.closeContext()
        if data is not None:
            self.openContext(data.domain.class_var)
            self._update_header()
        self._update_class_selection()
        self._invalidate()

    def set_test_data(self, data):
        self.error(1)
        if data and not data.domain.class_var:
            self.error(1, "Test data input requires a class variable")
            data = None

        self.test_data = data
        if self.resampling == OWTestLearners.TestOnTest:
            self._invalidate()

    def handleNewSignals(self):
        self.update_results()
        self.commit()

    def kfold_changed(self):
        self.resampling = OWTestLearners.KFold
        self._param_changed()

    def bootstrap_changed(self):
        self.resampling = OWTestLearners.Bootstrap
        self._param_changed()

    def _param_changed(self):
        self._invalidate()

    def update_results(self):
        self.warning([1, 2])
        self.error(2)

        if self.train_data is None:
            return

        if self.resampling == OWTestLearners.TestOnTest:
            if self.test_data is None:
                self.warning(2, "Missing separate test data input")
                return

            elif self.test_data.domain.class_var != \
                    self.train_data.domain.class_var:
                self.error(2, ("Inconsistent class variable between test " +
                               "and train data sets"))
                return

        # items in need of an update
        items = [(key, input) for key, input in self.learners.items()
                 if input.results is None]
        learners = [input.learner for _, input in items]

        self.setStatusMessage("Running")
        if self.test_data is not None and \
                self.resampling != OWTestLearners.TestOnTest:
            self.warning(1, "Test data is present but unused. "
                            "Select 'Test on test data' to use it.")

        # TODO: Test each learner individually
        try:
            if self.resampling == OWTestLearners.KFold:
                results = Orange.evaluation.CrossValidation(
                    self.train_data, learners, k=self.k_folds, store_data=True
                )
            elif self.resampling == OWTestLearners.LeaveOneOut:
                results = Orange.evaluation.LeaveOneOut(
                    self.train_data, learners, store_data=True
                )
            elif self.resampling == OWTestLearners.Bootstrap:
                p = self.sample_p / 100.0
                results = Orange.evaluation.Bootstrap(
                    self.train_data, learners, n_resamples=self.n_repeat, p=p,
                    store_data=True
                )
            elif self.resampling == OWTestLearners.TestOnTrain:
                results = Orange.evaluation.TestOnTrainingData(
                    self.train_data, learners, store_data=True
                )
            elif self.resampling == OWTestLearners.TestOnTest:
                if self.test_data is None:
                    return
                results = Orange.evaluation.TestOnTestData(
                    self.train_data, self.test_data, learners, store_data=True
                )
            else:
                assert False
        except Exception as e:
            self.error(2, str(e))
            return

        self.results = results
        results = list(split_by_model(results))

        class_var = self.train_data.domain.class_var

        if class_var.is_discrete:
            stats = [classification_stats(self.one_vs_rest(res)) for res in results]
        else:
            stats = [regression_stats(res) for res in results]

        self._update_header()

        for (key, input), res, stat in zip(items, results, stats):
            self.learners[key] = input._replace(results=res, stats=stat)

        self.setStatusMessage("")

        self._update_stats_model()


    def _update_header(self):
        headers = ["Method"]
        if self.train_data is not None:
            if self.train_data.domain.class_var.is_discrete:
                headers.extend(classification_stats.headers)
            else:
                headers.extend(regression_stats.headers)
        for i in reversed(range(len(headers),
                                self.result_model.columnCount())):
            self.result_model.takeColumn(i)

        self.result_model.setHorizontalHeaderLabels(headers)

    def _update_stats_model(self):
        model = self.view.model()

        for r in reversed(range(model.rowCount())):
            model.takeRow(r)

        for input in self.learners.values():
            name = learner_name(input.learner)
            row = []
            head = QStandardItem()
            head.setData(name, Qt.DisplayRole)
            row.append(head)
            for stat in input.stats:
                item = QStandardItem()
                item.setData(" {:.3f} ".format(stat[0]), Qt.DisplayRole)
                row.append(item)
            model.appendRow(row)

    def _update_class_selection(self):
        self.class_selection_combo.clear()
        if not self.train_data:
            return
        if self.train_data.domain.class_var.is_discrete:
            self.cbox.setVisible(True)
            values = self.train_data.domain.class_var.values
            self.class_selection_combo.addItem("(None)")
            self.class_selection_combo.addItems(values)

            class_index = 0
            if self.class_selection in self.train_data.domain.class_var.values:
                    class_index = self.train_data.domain.class_var.values.index(self.class_selection)+1
            else:
                self.class_selection = '(None)'

            self.class_selection_combo.setCurrentIndex(class_index)
            self.previous_class_selection = "(None)"
        else:
            self.cbox.setVisible(False)

    def one_vs_rest(self, res):
        if self.class_selection != '(None)' and self.class_selection != 0:
            class_ = self.train_data.domain.class_var.values.index(self.class_selection)
            actual = res.actual == class_
            predicted = res.predicted == class_
            return Results(
                nmethods=1, domain=self.train_data.domain,
                actual=actual, predicted=predicted)
        else:
            return res

    def _select_class(self):
        if self.previous_class_selection == self.class_selection:
            return

        results = list(split_by_model(self.results))

        items = [(key, input) for key, input in self.learners.items()]
        learners = [input.learner for _, input in items]

        class_var = self.train_data.domain.class_var
        if class_var.is_discrete:
            stats = [classification_stats(self.one_vs_rest(res)) for res in results]
        else:
            stats = [regression_stats(res) for res in results]

        for (key, input), res, stat in zip(items, results, stats):
            self.learners[key] = input._replace(results=res, stats=stat)

        self.setStatusMessage("")

        self._update_stats_model()
        self.previous_class_selection = self.class_selection

    def _invalidate(self, which=None):
        if which is None:
            which = self.learners.keys()

        all_keys = list(self.learners.keys())
        model = self.view.model()

        for key in which:
            self.learners[key] = \
                self.learners[key]._replace(results=None, stats=None)

            if key in self.learners:
                row = all_keys.index(key)
                for c in range(1, model.columnCount()):
                    item = model.item(row, c)
                    if item is not None:
                        item.setData(None, Qt.DisplayRole)

    def apply(self):
        self.update_results()
        self.commit()

    def commit(self):
        results = [val.results for val in self.learners.values()
                   if val.results is not None]
        if results:
            combined = results_merge(results)
            combined.learner_names = [learner_name(val.learner)
                                     for val in self.learners.values()]
        else:
            combined = None
        self.send("Evaluation Results", combined)
Ejemplo n.º 11
0
class VariableTreeBox(QGroupBox):

    class VariableTreeButton(QToolButton):
        def __init__(self, *args, **kwargs):
            QToolButton.__init__(self, *args, **kwargs)
            baseIcon = QIcon(":go-next.svg")
            secondIcon = QIcon(":go-previous.svg")
            self.__icons__ = [baseIcon, secondIcon]
            self.setIcon(baseIcon)
            
        def switchIcon(self):
            self.__icons__.reverse()
            self.setIcon(self.__icons__[0])
            
    def __init__(self, id, name="Choose Variables", model=None):
        QGroupBox.__init__(self)
        if model is None:
            model = TreeModel()
        self.setTitle(name)
        self.setToolTip("<p>Select variables for analysis</p>")
        self.id = id
        self.pairs = {}

        layout = HBoxLayout()
        self.variableTreeView = QTreeView()
        self.variableTreeView.setModel(model)
        self.variableTreeView.setToolTip("Select variables from here")
        self.variableTreeView.setSelectionMode(QAbstractItemView.ExtendedSelection)
        self.variableTreeView.installEventFilter(self)
        layout.addWidget(self.variableTreeView)
        self.widgetsLayout = VBoxLayout()
        layout.addLayout(self.widgetsLayout)
        self.setLayout(layout)
        
    def addItem(self, widget):
        hbox = HBoxLayout()
        button = self.VariableTreeButton()
        button.setToolTip("Specify variable")
        self.connect(button, SIGNAL("clicked()"), self.move)
        widget.widget.installEventFilter(self)
        hbox.addWidget(button)
        hbox.addWidget(widget)
        self.widgetsLayout.addLayout(hbox)
        self.widgetsLayout.addStretch()
        self.pairs[button] = widget

    def eventFilter(self, object, event):
        if event.type() == QEvent.FocusIn:
            self.switchButton(object)
        return False

    def move(self):
        sender = self.sender()
        receiver = self.pairs[sender]
        path = QString()
        if isinstance(receiver, VariableLineEdit):
            if receiver.widget.text().isEmpty():
                indexes = self.variableTreeView.selectedIndexes()
                if len(indexes) < 1:
                    return
                path = self.variableTreeView.model().parentTree(indexes[0])
            sender.switchIcon()
            receiver.widget.setText(path)
        elif isinstance(receiver, VariableListBox):
            add = self.variableTreeView.hasFocus()
            if add:
                indexes = self.variableTreeView.selectedIndexes()
                paths = [self.variableTreeView.model().parentTree(index) for index in indexes if index.column() == 0]
                receiver.widget.addItems(paths)
            else:
                indexes = receiver.widget.selectedItems()
                for index in indexes:
                    item = receiver.widget.takeItem(receiver.widget.row(index))
                    del item

    def switchButton(self, sender):
        if not isinstance(sender, QLineEdit):
            for key, value in self.pairs.iteritems():
                if value.widget == sender:
                    key.setIcon(QIcon(":go-previous.svg"))
                else:
                    key.setIcon(QIcon(":go-next.svg"))

    def parameterValues(self):
        params = {}
        for button, widget in self.pairs.iteritems():
            params.update(widget.parameterValues())
        return params
class PluginRepositoriesGUI(QWidget):
    PATH_COLUMN = 1
    ACTIVE_COLUMN = 0
    AUTO_UPDATE_COLUMN = 2
    STATUS_COLUMN = 3
    
    addRepository = pyqtSignal()
    checkForUpdates = pyqtSignal()
    
    def __init__(self, logger, parent):
        super(PluginRepositoriesGUI, self).__init__(parent)
       
        self.logger = logger
        self._gitHandler = GitHandler(logger)
        
        layout = QVBoxLayout(self)
        
        self._reposTable = QTreeView(self) 
        self._reposTable.setIndentation(0)
        self._reposTable.header().setStretchLastSection(False)
                
        self._reposModel = QStandardItemModel(self._reposTable)
        self._initModel()
        self._reposTable.setModel(self._reposModel)
        
        self._reposTable.setSelectionMode(QTreeView.ExtendedSelection)
        self._reposTable.selectionModel().selectionChanged.connect(self._selectionChanged)
        
        layout.addWidget(self._reposTable)

        buttonLayout = QHBoxLayout()    
        addButton = QPushButton("Add...")
        addButton.clicked.connect(self.addRepository)
        self._removeButton = QPushButton("Remove")
        self._removeButton.setEnabled(False)
        self._removeButton.clicked.connect(self._removeSelected)
        refreshButton = QPushButton("Check Status")
        refreshButton.clicked.connect(self.checkForUpdates)
        buttonLayout.addWidget(addButton)
        buttonLayout.addWidget(self._removeButton)
        buttonLayout.addWidget(refreshButton)
        
        layout.addLayout(buttonLayout)
        
        self._statusWidget = QWidget(self) 
        self._statusWidget.setVisible(False)
        statusLayout = QHBoxLayout(self._statusWidget)
        statusLayout.setContentsMargins(0, 0, 0, 0)
        self._statusLabel = QLabel(self)
        statusLayout.addWidget(self._statusLabel)
        layout.addWidget(self._statusWidget)
        
    def clear(self):
        self._initModel()
        
    def _updateStatusItem(self, item, path, outdated=None, upToDate=None):
        # first check fresh results, then the ones from repositories
        if upToDate and path in upToDate:
            item.setText(upToDate[path])
            if upToDate[path] == GitHandler.UP_TO_DATE_REASON:
                item.setData(QColor(0, 255, 0), Qt.DecorationRole)
            else:
                item.setData(QColor(255, 0, 0), Qt.DecorationRole)
        elif outdated and path in outdated:
            item.setText("Repository can be updated")
            item.setData(QColor(255, 215, 0), Qt.DecorationRole)
        elif get_settings().get_plugin_repositories().isUpToDate(path):
            item.setText("No updates (for details, check status)")
            item.setData(QColor(0, 255, 0), Qt.DecorationRole)
        elif get_settings().get_plugin_repositories().isOutdated(path):
            item.setText("Repository can be updated")
            item.setData(QColor(255, 215, 0), Qt.DecorationRole)
        else:
            item.setData(None, Qt.DecorationRole)
        
    def updateStatusItems(self, outdated=None, upToDate=None):
        for row in xrange(self._reposModel.rowCount()):
            path = convert_string(self._reposModel.item(row, self.PATH_COLUMN).data(Qt.DisplayRole).toString())
            self._updateStatusItem(self._reposModel.item(row, self.STATUS_COLUMN), path, outdated, upToDate)
        
    def _initModel(self):
        from PyQt4.QtCore import QStringList
        self._reposModel.clear()
        stringList = QStringList([u"Active", u"Path", u"Auto Update", u"Status"])
        self._reposModel.setColumnCount(stringList.count())
        self._reposModel.setHorizontalHeaderLabels(stringList)
        
    def appendRepository(self, path, active, autoUpdate, canAutoUpdate = None):
        activeItem = QStandardItem()
        activeItem.setEditable(False)
        activeItem.setCheckState(Qt.Checked if active else Qt.Unchecked)
        activeItem.setCheckable(True)
        
        pathItem = QStandardItem()
        pathItem.setData(path, Qt.DisplayRole)
        pathItem.setEditable(False)
        
        autoUpdateItem = QStandardItem()
        autoUpdateItem.setEditable(False)
        if canAutoUpdate == None:
            canAutoUpdate = self._gitHandler.hasGit(path)
        if canAutoUpdate:
            autoUpdateItem.setCheckState(Qt.Checked if autoUpdate else Qt.Unchecked)
            autoUpdateItem.setCheckable(True)
                
        statusItem = QStandardItem()
        self._updateStatusItem(statusItem, path)
                
        self._reposModel.appendRow([activeItem, pathItem, autoUpdateItem, statusItem])
        
    def resizeColumns(self):
        self._reposTable.resizeColumnToContents(self.ACTIVE_COLUMN)
        self._reposTable.resizeColumnToContents(self.AUTO_UPDATE_COLUMN)
        self._reposTable.header().setResizeMode(self.PATH_COLUMN, QHeaderView.Stretch)
        self._reposTable.header().setResizeMode(self.STATUS_COLUMN, QHeaderView.ResizeToContents)
    
    def getTable(self):
        return self._reposTable
    
    @loggingSlot(QItemSelection, QItemSelection)
    def _selectionChanged(self, _sel, _desel):
        selection = self._reposTable.selectionModel().selectedRows()
        self._removeButton.setEnabled(len(selection) > 0)
        
    @loggingSlot()
    def _removeSelected(self):
        selection = self._reposTable.selectionModel().selectedRows()
        for index in selection:
            self._reposTable.model().removeRow(index.row())
        
    def setStatus(self, msg, _progress=False):
        if msg:
            self._statusLabel.setText(msg)
            self._statusWidget.setVisible(True)
        else:
            self._statusWidget.setVisible(False)
Ejemplo n.º 13
0
class OWTestLearners(widget.OWWidget):
    name = "Test Learners"
    description = ""
    icon = "icons/TestLearners1.svg"

    inputs = [("Learner", Orange.classification.Fitter,
               "set_learner", widget.Multiple),
              ("Data", Orange.data.Table, "set_train_data", widget.Default),
              ("Test Data", Orange.data.Table, "set_test_data")]

    outputs = [("Evaluation Results", testing.Results)]

    #: Resampling/testing types
    KFold, LeaveOneOut, Bootstrap, TestOnTrain, TestOnTest = 0, 1, 2, 3, 4

    #: Selected resampling type
    resampling = settings.Setting(0)
    #: Number of folds for K-fold cross validation
    k_folds = settings.Setting(10)
    #: Number of repeats for bootstrap sampling
    n_repeat = settings.Setting(10)
    #: Bootstrap sampling p
    sample_p = settings.Setting(75)

    def __init__(self, parent=None):
        super().__init__(parent)

        self.train_data = None
        self.test_data = None

        #: An Ordered dictionary with current inputs and their testing
        #: results.
        self.learners = OrderedDict()

        sbox = gui.widgetBox(self.controlArea, "Sampling")
        rbox = gui.radioButtons(
            sbox, self, "resampling", callback=self._param_changed
        )
        gui.appendRadioButton(rbox, "Cross validation")
        ibox = gui.indentedBox(rbox)
        gui.spin(ibox, self, "k_folds", 2, 50, label="Number of folds:",
                 callback=self._param_changed)
        gui.appendRadioButton(rbox, "Leave one out")
        gui.appendRadioButton(rbox, "Random sampling")
        ibox = gui.indentedBox(rbox)
        gui.spin(ibox, self, "n_repeat", 2, 50, label="Repeat train/test",
                 callback=self._param_changed)
        gui.widgetLabel(ibox, "Relative training set size:")
        gui.hSlider(ibox, self, "sample_p", minValue=1, maxValue=100,
                    ticks=20, vertical=False,
                    callback=self._param_changed)

        gui.appendRadioButton(rbox, "Test on train data")
        gui.appendRadioButton(rbox, "Test on test data")

        rbox.layout().addSpacing(5)
        gui.button(rbox, self, "Apply", callback=self.apply)

        gui.rubber(self.controlArea)

        self.view = QTreeView(
            rootIsDecorated=False,
            uniformRowHeights=True,
            wordWrap=True,
            editTriggers=QTreeView.NoEditTriggers
        )

        self.result_model = QStandardItemModel()
        self.result_model.setHorizontalHeaderLabels(
            ["Method"] + classification_stats.headers
        )
        self.view.setModel(self.result_model)
        box = gui.widgetBox(self.mainArea, "Evaluation Results")
        box.layout().addWidget(self.view)

    def set_learner(self, learner, key):
        if key in self.learners and learner is None:
            del self.learners[key]
        else:
            self.learners[key] = Input(learner, None, None)

    def set_train_data(self, data):
        self.train_data = data
        self._invalidate()
        if data is not None and is_discrete(data.domain.class_var):
            headers = ["Method"] + classification_stats.headers
        else:
            headers = ["Method"] + regression_stats.headers
        self.result_model.setHorizontalHeaderLabels(headers)

    def set_test_data(self, data):
        self.test_data = data
        if self.resampling == OWTestLearners.TestOnTest:
            self._invalidate()

    def handleNewSignals(self):
        self.update_results()
        self.commit()

    def _param_changed(self):
        self._invalidate()

    def update_results(self):
        # items in need of an update
        items = [(key, input) for key, input in self.learners.items()
                 if input.results is None]
        learners = [input.learner for _, input in items]

        self.setStatusMessage("Running")

        # TODO: Test each learner individually

        if self.resampling == OWTestLearners.KFold:
            results = testing.CrossValidation(
                self.train_data, learners, k=self.k_folds, store_data=True
            )
        elif self.resampling == OWTestLearners.LeaveOneOut:
            results = testing.LeaveOneOut(
                self.train_data, learners, store_data=True
            )
        elif self.resampling == OWTestLearners.Bootstrap:
            p = self.sample_p / 100.0
            results = testing.Bootstrap(
                self.train_data, learners, n_resamples=self.n_repeat, p=p,
                store_data=True
            )
        elif self.resampling == OWTestLearners.TestOnTrain:
            results = testing.TestOnTrainingData(
                self.train_data, learners, store_data=True
            )
        elif self.resampling == OWTestLearners.TestOnTest:
            assert self.test_data is not None
            results = testing.TestOnTestData(
                self.train_data, self.test_data, learners, store_data=True
            )
        else:
            assert False

        results = list(split_by_model(results))
        class_var = self.train_data.domain.class_var

        if is_discrete(class_var):
            test_stats = classification_stats
        else:
            test_stats = regression_stats

        stats = [test_stats(res) for res in results]
        for (key, input), res, stat in zip(items, results, stats):
            self.learners[key] = input._replace(results=res, stats=stat)

        self.setStatusMessage("")
        self._update_stats_model()

    def _update_stats_model(self):
        model = self.view.model()

        for r in reversed(range(model.rowCount())):
            model.takeRow(r)

        for input in self.learners.values():
            name = learner_name(input.learner)
            row = []
            head = QStandardItem()
            head.setData(name, Qt.DisplayRole)
            row.append(head)
            for stat in input.stats:
                item = QStandardItem()
                item.setData(float(stat[0]), Qt.DisplayRole)
                row.append(item)
            model.appendRow(row)

        self.view.resizeColumnToContents(0)

    def _invalidate(self, which=None):
        if which is None:
            which = self.learners.keys()

        all_keys = list(self.learners.keys())
        model = self.view.model()

        for key in which:
            self.learners[key] = \
                self.learners[key]._replace(results=None, stats=None)

            if key in self.learners:
                row = all_keys.index(key)
                for c in range(1, model.columnCount()):
                    item = model.item(row, c)
                    item.setData(None, Qt.DisplayRole)

    def apply(self):
        self.update_results()
        self.commit()

    def commit(self):
        results = [val.results for val in self.learners.values()]
        if results:
            combined = results_merge(results)
            combined.fitter_names = [learner_name(val.learner)
                                     for val in self.learners.values()]
        else:
            combined = None
        self.send("Evaluation Results", combined)
Ejemplo n.º 14
0
class AddToProject(QDialog):
    """Dialog to let the user choose one of the folders from the opened proj"""

    def __init__(self, projects, parent=None):
        super(AddToProject, self).__init__(parent)
        #pathProjects must be a list
        self._projects = projects
        self.setWindowTitle(translations.TR_ADD_FILE_TO_PROJECT)
        self.pathSelected = ''
        vbox = QVBoxLayout(self)

        hbox = QHBoxLayout()
        self._list = QListWidget()
        for project in self._projects:
            self._list.addItem(project.name)
        self._list.setCurrentRow(0)
        self._tree = QTreeView()
        #self._tree.header().setHidden(True)
        self._tree.setSelectionMode(QTreeView.SingleSelection)
        self._tree.setAnimated(True)
        self.load_tree(self._projects[0])
        hbox.addWidget(self._list)
        hbox.addWidget(self._tree)
        vbox.addLayout(hbox)

        hbox2 = QHBoxLayout()
        btnAdd = QPushButton(translations.TR_ADD_HERE)
        btnCancel = QPushButton(translations.TR_CANCEL)
        hbox2.addWidget(btnCancel)
        hbox2.addWidget(btnAdd)
        vbox.addLayout(hbox2)

        self.connect(btnCancel, SIGNAL("clicked()"), self.close)
        self.connect(btnAdd, SIGNAL("clicked()"), self._select_path)

    def load_tree(self, project):
        """Load the tree view on the right based on the project selected."""
        qfsm = QFileSystemModel()
        #FIXME it's not loading the proper folder, just the root /
        qfsm.setRootPath(project.path)
        qfsm.setFilter(QDir.AllDirs | QDir.NoDotAndDotDot)
        qfsm.setNameFilterDisables(False)
        pext = ["*{0}".format(x) for x in project.extensions]
        qfsm.setNameFilters(pext)
        self._tree.setModel(qfsm)

        t_header = self._tree.header()
        t_header.setHorizontalScrollMode(QAbstractItemView.ScrollPerPixel)
        t_header.setResizeMode(0, QHeaderView.Stretch)
        t_header.setStretchLastSection(False)
        t_header.setClickable(True)

        self._tree.hideColumn(1)  # Size
        self._tree.hideColumn(2)  # Type
        self._tree.hideColumn(3)  # Modification date

    def _select_path(self):
        """Set pathSelected to the folder selected in the tree."""
        path = self._tree.model().filePath(self._tree.currentIndex())
        if path:
            self.pathSelected = path
            self.close()
Ejemplo n.º 15
0
class OWGeneInfo(widget.OWWidget):
    name = "Gene Info"
    description = "Displays gene information from NCBI and other sources."
    icon = "../widgets/icons/GeneInfo.svg"
    priority = 2010

    inputs = [("Data", Orange.data.Table, "setData")]
    outputs = [("Data Subset", Orange.data.Table)]

    settingsHandler = settings.DomainContextHandler()

    organism_index = settings.ContextSetting(0)
    taxid = settings.ContextSetting("9606")

    gene_attr = settings.ContextSetting(0)

    auto_commit = settings.Setting(False)
    search_string = settings.Setting("")

    useAttr = settings.ContextSetting(False)
    useAltSource = settings.ContextSetting(False)

    def __init__(self, parent=None, ):
        super().__init__(self, parent)

        self.selectionChangedFlag = False

        self.__initialized = False
        self.initfuture = None
        self.itemsfuture = None

        self.infoLabel = gui.widgetLabel(
            gui.widgetBox(self.controlArea, "Info", addSpace=True),
            "Initializing\n"
        )

        self.organisms = None
        self.organismBox = gui.widgetBox(
            self.controlArea, "Organism", addSpace=True)

        self.organismComboBox = gui.comboBox(
            self.organismBox, self, "organism_index",
            callback=self._onSelectedOrganismChanged)

        # For now only support one alt source, with a checkbox
        # In the future this can be extended to multiple selections
        self.altSourceCheck = gui.checkBox(
            self.organismBox, self, "useAltSource",
            "Show information from dictyBase",
            callback=self.onAltSourceChange)

        self.altSourceCheck.hide()

        box = gui.widgetBox(self.controlArea, "Gene names", addSpace=True)
        self.geneAttrComboBox = gui.comboBox(
            box, self, "gene_attr",
            "Gene attribute", callback=self.updateInfoItems
        )
        self.geneAttrComboBox.setEnabled(not self.useAttr)
        cb = gui.checkBox(box, self, "useAttr", "Use attribute names",
                          callback=self.updateInfoItems)
        cb.toggled[bool].connect(self.geneAttrComboBox.setDisabled)

        gui.auto_commit(self.controlArea, self, "auto_commit", "Commit")

        # A label for dictyExpress link (Why oh god why???)
        self.dictyExpressBox = gui.widgetBox(
            self.controlArea, "Dicty Express")
        self.linkLabel = gui.widgetLabel(self.dictyExpressBox, "")
        self.linkLabel.setOpenExternalLinks(False)
        self.linkLabel.linkActivated.connect(self.onDictyExpressLink)

        self.dictyExpressBox.hide()

        gui.rubber(self.controlArea)

        gui.lineEdit(self.mainArea, self, "search_string", "Filter",
                     callbackOnType=True, callback=self.searchUpdate)

        self.treeWidget = QTreeView(
            self.mainArea,
            selectionMode=QTreeView.ExtendedSelection,
            rootIsDecorated=False,
            uniformRowHeights=True,
            sortingEnabled=True)

        self.treeWidget.setItemDelegate(
            gui.LinkStyledItemDelegate(self.treeWidget))
        self.treeWidget.viewport().setMouseTracking(True)
        self.mainArea.layout().addWidget(self.treeWidget)

        box = gui.widgetBox(self.mainArea, "", orientation="horizontal")
        gui.button(box, self, "Select Filtered", callback=self.selectFiltered)
        gui.button(box, self, "Clear Selection",
                   callback=self.treeWidget.clearSelection)

        self.geneinfo = []
        self.cells = []
        self.row2geneinfo = {}
        self.data = None

        # : (# input genes, # matches genes)
        self.matchedInfo = 0, 0

        self.setBlocking(True)
        self.executor = ThreadExecutor(self)

        self.progressBarInit()

        task = Task(
            function=partial(
                taxonomy.ensure_downloaded,
                callback=methodinvoke(self, "advance", ())
            )
        )

        task.resultReady.connect(self.initialize)
        task.exceptionReady.connect(self._onInitializeError)

        self.initfuture = self.executor.submit(task)

    def sizeHint(self):
        return QSize(1024, 720)

    @Slot()
    def advance(self):
        assert self.thread() is QThread.currentThread()
        self.progressBarSet(self.progressBarValue + 1,
                            processEvents=None)

    def initialize(self):
        if self.__initialized:
            # Already initialized
            return
        self.__initialized = True

        self.organisms = sorted(
            set([name.split(".")[-2] for name in
                 serverfiles.listfiles("NCBI_geneinfo")] +
                gene.NCBIGeneInfo.common_taxids())
        )

        self.organismComboBox.addItems(
            [taxonomy.name(tax_id) for tax_id in self.organisms]
        )
        if self.taxid in self.organisms:
            self.organism_index = self.organisms.index(self.taxid)
        else:
            self.organism_index = 0
            self.taxid = self.organisms[self.organism_index]

        self.altSourceCheck.setVisible(self.taxid == DICTY_TAXID)
        self.dictyExpressBox.setVisible(self.taxid == DICTY_TAXID)

        self.infoLabel.setText("No data on input\n")
        self.initfuture = None

        self.setBlocking(False)
        self.progressBarFinished(processEvents=None)

    def _onInitializeError(self, exc):
        sys.excepthook(type(exc), exc.args, None)
        self.error(0, "Could not download the necessary files.")

    def _onSelectedOrganismChanged(self):
        assert 0 <= self.organism_index <= len(self.organisms)
        self.taxid = self.organisms[self.organism_index]
        self.altSourceCheck.setVisible(self.taxid == DICTY_TAXID)
        self.dictyExpressBox.setVisible(self.taxid == DICTY_TAXID)

        if self.data is not None:
            self.updateInfoItems()

    def setData(self, data=None):
        if not self.__initialized:
            self.initfuture.result()
            self.initialize()

        if self.itemsfuture is not None:
            raise Exception("Already processing")

        self.closeContext()
        self.data = data

        if data is not None:
            self.geneAttrComboBox.clear()
            self.attributes = \
                [attr for attr in data.domain.variables + data.domain.metas
                 if isinstance(attr, (Orange.data.StringVariable,
                                      Orange.data.DiscreteVariable))]

            for var in self.attributes:
                self.geneAttrComboBox.addItem(*gui.attributeItem(var))

            self.taxid = data_hints.get_hint(self.data, "taxid", self.taxid)
            self.useAttr = data_hints.get_hint(
                self.data, "genesinrows", self.useAttr)

            self.openContext(data)
            self.gene_attr = min(self.gene_attr, len(self.attributes) - 1)

            if self.taxid in self.organisms:
                self.organism_index = self.organisms.index(self.taxid)
            else:
                self.organism_index = 0
                self.taxid = self.organisms[self.organism_index]

            self.updateInfoItems()
        else:
            self.clear()

    def infoSource(self):
        """ Return the current selected info source getter function from
        INFO_SOURCES
        """
        org = self.organisms[min(self.organism_index, len(self.organisms) - 1)]
        if org not in INFO_SOURCES:
            org = "default"
        sources = INFO_SOURCES[org]
        name, func = sources[min(self.useAltSource, len(sources) - 1)]
        return name, func

    def inputGenes(self):
        if self.useAttr:
            genes = [attr.name for attr in self.data.domain.attributes]
        elif self.attributes:
            attr = self.attributes[self.gene_attr]
            genes = [str(ex[attr]) for ex in self.data
                     if not math.isnan(ex[attr])]
        else:
            genes = []
        return genes

    def updateInfoItems(self):
        self.warning(0)
        if self.data is None:
            return

        genes = self.inputGenes()
        if self.useAttr:
            genes = [attr.name for attr in self.data.domain.attributes]
        elif self.attributes:
            attr = self.attributes[self.gene_attr]
            genes = [str(ex[attr]) for ex in self.data
                     if not math.isnan(ex[attr])]
        else:
            genes = []
        if not genes:
            self.warning(0, "Could not extract genes from input dataset.")

        self.warning(1)
        org = self.organisms[min(self.organism_index, len(self.organisms) - 1)]
        source_name, info_getter = self.infoSource()

        self.error(0)

        self.updateDictyExpressLink(genes, show=org == DICTY_TAXID)
        self.altSourceCheck.setVisible(org == DICTY_TAXID)

        self.progressBarInit()
        self.setBlocking(True)
        self.setEnabled(False)
        self.infoLabel.setText("Retrieving info records.\n")

        self.genes = genes

        task = Task(
            function=partial(
                info_getter, org, genes,
                advance=methodinvoke(self, "advance", ()))
        )
        self.itemsfuture = self.executor.submit(task)
        task.finished.connect(self._onItemsCompleted)

    def _onItemsCompleted(self):
        self.setBlocking(False)
        self.progressBarFinished()
        self.setEnabled(True)

        try:
            schema, geneinfo = self.itemsfuture.result()
        finally:
            self.itemsfuture = None

        self.geneinfo = geneinfo = list(zip(self.genes, geneinfo))
        self.cells = cells = []
        self.row2geneinfo = {}
        links = []
        for i, (_, gi) in enumerate(geneinfo):
            if gi:
                row = []
                for _, item in zip(schema, gi):
                    if isinstance(item, Link):
                        # TODO: This should be handled by delegates
                        row.append(item.text)
                        links.append(item.link)
                    else:
                        row.append(item)
                cells.append(row)
                self.row2geneinfo[len(cells) - 1] = i

        model = TreeModel(cells, [str(col) for col in schema], None)

        model.setColumnLinks(0, links)
        proxyModel = QSortFilterProxyModel(self)
        proxyModel.setSourceModel(model)
        self.treeWidget.setModel(proxyModel)
        self.treeWidget.selectionModel().selectionChanged.connect(self.commit)

        for i in range(7):
            self.treeWidget.resizeColumnToContents(i)
            self.treeWidget.setColumnWidth(
                i, min(self.treeWidget.columnWidth(i), 200)
            )

        self.infoLabel.setText("%i genes\n%i matched NCBI's IDs" %
                               (len(self.genes), len(cells)))
        self.matchedInfo = len(self.genes), len(cells)

    def clear(self):
        self.infoLabel.setText("No data on input\n")
        self.treeWidget.setModel(
            TreeModel([], ["NCBI ID", "Symbol", "Locus Tag",
                           "Chromosome", "Description", "Synonyms",
                           "Nomenclature"], self.treeWidget))

        self.geneAttrComboBox.clear()
        self.send("Data Subset", None)

    def commit(self):
        if self.data is None:
            self.send("Data Subset", None)
            return

        model = self.treeWidget.model()
        selection = self.treeWidget.selectionModel().selection()
        selection = model.mapSelectionToSource(selection)
        selectedRows = list(
            chain(*(range(r.top(), r.bottom() + 1) for r in selection))
        )

        model = model.sourceModel()

        selectedGeneids = [self.row2geneinfo[row] for row in selectedRows]
        selectedIds = [self.geneinfo[i][0] for i in selectedGeneids]
        selectedIds = set(selectedIds)
        gene2row = dict((self.geneinfo[self.row2geneinfo[row]][0], row)
                        for row in selectedRows)

        isselected = selectedIds.__contains__

        if self.useAttr:
            def is_selected(attr):
                return attr.name in selectedIds
            attrs = [attr for attr in self.data.domain.attributes
                     if isselected(attr.name)]
            domain = Orange.data.Domain(
                attrs, self.data.domain.class_vars, self.data.domain.metas)
            newdata = self.data.from_table(domain, self.data)
            self.send("Data Subset", newdata)

        elif self.attributes:
            attr = self.attributes[self.gene_attr]
            gene_col = [attr.str_val(v)
                        for v in self.data.get_column_view(attr)[0]]
            gene_col = [(i, name) for i, name in enumerate(gene_col)
                        if isselected(name)]
            indices = [i for i, _ in gene_col]

            # Add a gene info columns to the output
            headers = [str(model.headerData(i, Qt.Horizontal, Qt.DisplayRole))
                       for i in range(model.columnCount())]
            metas = [Orange.data.StringVariable(name) for name in headers]
            domain = Orange.data.Domain(
                self.data.domain.attributes, self.data.domain.class_vars,
                self.data.domain.metas + tuple(metas))

            newdata = self.data.from_table(domain, self.data)[indices]

            model_rows = [gene2row[gene] for _, gene in gene_col]
            for col, meta in zip(range(model.columnCount()), metas):
                col_data = [str(model.index(row, col).data(Qt.DisplayRole))
                            for row in model_rows]
                col_data = np.array(col_data, dtype=object, ndmin=2).T
                newdata[:, meta] = col_data

            if not len(newdata):
                newdata = None

            self.send("Data Subset", newdata)
        else:
            self.send("Data Subset", None)

    def rowFiltered(self, row):
        searchStrings = self.search_string.lower().split()
        row = " ".join(self.cells[row]).lower()
        return not all([s in row for s in searchStrings])

    def searchUpdate(self):
        if not self.data:
            return
        searchStrings = self.search_string.lower().split()
        index = self.treeWidget.model().sourceModel().index
        mapFromSource = self.treeWidget.model().mapFromSource
        for i, row in enumerate(self.cells):
            row = " ".join(row).lower()
            self.treeWidget.setRowHidden(
                mapFromSource(index(i, 0)).row(),
                QModelIndex(),
                not all([s in row for s in searchStrings]))

    def selectFiltered(self):
        if not self.data:
            return
        itemSelection = QItemSelection()

        index = self.treeWidget.model().sourceModel().index
        mapFromSource = self.treeWidget.model().mapFromSource
        for i, row in enumerate(self.cells):
            if not self.rowFiltered(i):
                itemSelection.select(mapFromSource(index(i, 0)),
                                     mapFromSource(index(i, 0)))
        self.treeWidget.selectionModel().select(
            itemSelection,
            QItemSelectionModel.Select | QItemSelectionModel.Rows)

    def updateDictyExpressLink(self, genes, show=False):
        def fix(ddb):
            if ddb.startswith("DDB"):
                if not ddb.startswith("DDB_G"):
                    ddb = ddb.replace("DDB", "DDB_G")
                return ddb
            return None
        if show:
            genes = [fix(gene) for gene in genes if fix(gene)]
            link1 = '<a href="http://dictyexpress.biolab.si/run/index.php?gene=%s">Microarray profile</a>'
            link2 = '<a href="http://dictyexpress.biolab.si/run/index.php?gene=%s&db=rnaseq">RNA-Seq profile</a>'
            self.linkLabel.setText(link1 + "<br/>" + link2)

            show = any(genes)

        if show:
            self.dictyExpressBox.show()
        else:
            self.dictyExpressBox.hide()

    def onDictyExpressLink(self, link):
        if not self.data:
            return

        selectedIndexes = self.treeWidget.selectedIndexes()
        if not len(selectedIndexes):
            QMessageBox.information(
                self, "No gene ids selected",
                "Please select some genes and try again."
            )
            return
        model = self.treeWidget.model()
        mapToSource = model.mapToSource
        selectedRows = self.treeWidget.selectedIndexes()
        selectedRows = [mapToSource(index).row() for index in selectedRows]
        model = model.sourceModel()

        selectedGeneids = [self.row2geneinfo[row] for row in selectedRows]
        selectedIds = [self.geneinfo[i][0] for i in selectedGeneids]
        selectedIds = set(selectedIds)

        def fix(ddb):
            if ddb.startswith("DDB"):
                if not ddb.startswith("DDB_G"):
                    ddb = ddb.replace("DDB", "DDB_G")
                return ddb
            return None

        genes = [fix(gene) for gene in selectedIds if fix(gene)]
        url = str(link) % " ".join(genes)
        QDesktopServices.openUrl(QUrl(url))

    def onAltSourceChange(self):
        self.updateInfoItems()

    def onDeleteWidget(self):
        # try to cancel pending tasks
        if self.initfuture:
            self.initfuture.cancel()
        if self.itemsfuture:
            self.itemsfuture.cancel()

        self.executor.shutdown(wait=False)
        super().onDeleteWidget()
Ejemplo n.º 16
0
class Widget(QWidget):
    def __init__(self, panel):
        super(Widget, self).__init__(panel)
        
        layout = QVBoxLayout()
        self.setLayout(layout)
        layout.setSpacing(0)
        
        self.searchEntry = SearchLineEdit()
        self.treeView = QTreeView(contextMenuPolicy=Qt.CustomContextMenu)
        self.textView = QTextBrowser()
        
        applyButton = QToolButton(autoRaise=True)
        editButton = QToolButton(autoRaise=True)
        addButton = QToolButton(autoRaise=True)
        self.menuButton = QPushButton(flat=True)
        menu = QMenu(self.menuButton)
        self.menuButton.setMenu(menu)
        
        splitter = QSplitter(Qt.Vertical)
        top = QHBoxLayout()
        layout.addLayout(top)
        splitter.addWidget(self.treeView)
        splitter.addWidget(self.textView)
        layout.addWidget(splitter)
        splitter.setSizes([200, 100])
        splitter.setCollapsible(0, False)
        
        top.addWidget(self.searchEntry)
        top.addWidget(applyButton)
        top.addSpacing(10)
        top.addWidget(addButton)
        top.addWidget(editButton)
        top.addWidget(self.menuButton)
        
        # action generator for actions added to search entry
        def act(slot, icon=None):
            a = QAction(self, triggered=slot)
            self.addAction(a)
            a.setShortcutContext(Qt.WidgetWithChildrenShortcut)
            icon and a.setIcon(icons.get(icon))
            return a
        
        # hide if ESC pressed in lineedit
        a = act(self.slotEscapePressed)
        a.setShortcut(QKeySequence(Qt.Key_Escape))
        
        # import action
        a = self.importAction = act(self.slotImport, 'document-open')
        menu.addAction(a)
        
        # export action
        a = self.exportAction = act(self.slotExport, 'document-save-as')
        menu.addAction(a)
        
        # apply button
        a = self.applyAction = act(self.slotApply, 'edit-paste')
        applyButton.setDefaultAction(a)
        menu.addSeparator()
        menu.addAction(a)
        
        # add button
        a = self.addAction_ = act(self.slotAdd, 'list-add')
        a.setShortcut(QKeySequence(Qt.Key_Insert))
        addButton.setDefaultAction(a)
        menu.addSeparator()
        menu.addAction(a)
        
        # edit button
        a = self.editAction = act(self.slotEdit, 'document-edit')
        a.setShortcut(QKeySequence(Qt.Key_F2))
        editButton.setDefaultAction(a)
        menu.addAction(a)
        
        # set shortcut action
        a = self.shortcutAction = act(self.slotShortcut, 'preferences-desktop-keyboard-shortcuts')
        menu.addAction(a)
        
        # delete action
        a = self.deleteAction = act(self.slotDelete, 'list-remove')
        a.setShortcut(QKeySequence(Qt.CTRL + Qt.Key_Delete))
        menu.addAction(a)
        
        # restore action
        a = self.restoreAction = act(self.slotRestore)
        menu.addSeparator()
        menu.addAction(a)
        
        # help button
        a = self.helpAction = act(self.slotHelp, 'help-contents')
        menu.addSeparator()
        menu.addAction(a)
        
        self.treeView.setSelectionBehavior(QTreeView.SelectRows)
        self.treeView.setSelectionMode(QTreeView.ExtendedSelection)
        self.treeView.setRootIsDecorated(False)
        self.treeView.setAllColumnsShowFocus(True)
        self.treeView.setModel(model.model())
        self.treeView.setCurrentIndex(QModelIndex())
        
        # signals
        self.searchEntry.returnPressed.connect(self.slotReturnPressed)
        self.searchEntry.textChanged.connect(self.updateFilter)
        self.treeView.doubleClicked.connect(self.slotDoubleClicked)
        self.treeView.customContextMenuRequested.connect(self.showContextMenu)
        self.treeView.selectionModel().currentChanged.connect(self.updateText)
        self.treeView.model().dataChanged.connect(self.updateFilter)
        
        # highlight text
        self.highlighter = highlight.Highlighter(self.textView.document())
        
        # complete on snippet variables
        self.searchEntry.setCompleter(QCompleter([
            ':icon', ':indent', ':menu', ':name', ':python', ':selection',
            ':set', ':symbol', ':template', ':template-run'], self.searchEntry))
        self.readSettings()
        app.settingsChanged.connect(self.readSettings)
        app.translateUI(self)
        self.updateColumnSizes()
        self.setAcceptDrops(True)

    def dropEvent(self, ev):
        if not ev.source() and ev.mimeData().hasUrls():
            filename = ev.mimeData().urls()[0].toLocalFile()
            if filename:
                ev.accept()
                from . import import_export
                import_export.load(filename, self)
        
    def dragEnterEvent(self, ev):
        if not ev.source() and ev.mimeData().hasUrls():
            ev.accept()
        
    def translateUI(self):
        try:
            self.searchEntry.setPlaceholderText(_("Search..."))
        except AttributeError:
            pass # not in Qt 4.6
        shortcut = lambda a: a.shortcut().toString(QKeySequence.NativeText)
        self.menuButton.setText(_("&Menu"))
        self.addAction_.setText(_("&Add..."))
        self.addAction_.setToolTip(
            _("Add a new snippet. ({key})").format(key=shortcut(self.addAction_)))
        self.editAction.setText(_("&Edit..."))
        self.editAction.setToolTip(
            _("Edit the current snippet. ({key})").format(key=shortcut(self.editAction)))
        self.shortcutAction.setText(_("Configure Keyboard &Shortcut..."))
        self.deleteAction.setText(_("&Remove"))
        self.deleteAction.setToolTip(_("Remove the selected snippets."))
        self.applyAction.setText(_("A&pply"))
        self.applyAction.setToolTip(_("Apply the current snippet."))
        self.importAction.setText(_("&Import..."))
        self.importAction.setToolTip(_("Import snippets from a file."))
        self.exportAction.setText(_("E&xport..."))
        self.exportAction.setToolTip(_("Export snippets to a file."))
        self.restoreAction.setText(_("Restore &Built-in Snippets..."))
        self.restoreAction.setToolTip(
            _("Restore deleted or changed built-in snippets."))
        self.helpAction.setText(_("&Help"))
        self.searchEntry.setToolTip(_(
            "Enter text to search in the snippets list.\n"
            "See \"What's This\" for more information."))
        self.searchEntry.setWhatsThis(''.join(map("<p>{0}</p>\n".format, (
            _("Enter text to search in the snippets list, and "
              "press Enter to apply the currently selected snippet."),
            _("If the search text fully matches the value of the '{name}' variable "
              "of a snippet, that snippet is selected.").format(name="name"),
            _("If the search text starts with a colon ':', the rest of the "
              "search text filters snippets that define the given variable. "
              "After a space a value can also be entered, snippets will then "
              "match if the value of the given variable contains the text after "
              "the space."),
            _("E.g. entering {menu} will show all snippets that are displayed "
              "in the insert menu.").format(menu="<code>:menu</code>"),
            ))))
    
    def sizeHint(self):
        return self.parent().mainwindow().size() / 4
        
    def readSettings(self):
        data = textformats.formatData('editor')
        self.textView.setFont(data.font)
        self.textView.setPalette(data.palette())

    def showContextMenu(self, pos):
        """Called when the user right-clicks the tree view."""
        self.menuButton.menu().popup(self.treeView.viewport().mapToGlobal(pos))
    
    def slotReturnPressed(self):
        """Called when the user presses Return in the search entry. Applies current snippet."""
        name = self.currentSnippet()
        if name:
            view = self.parent().mainwindow().currentView()
            insert.insert(name, view)
            self.parent().hide() # make configurable?
            view.setFocus()

    def slotEscapePressed(self):
        """Called when the user presses ESC in the search entry. Hides the panel."""
        self.parent().hide()
        self.parent().mainwindow().currentView().setFocus()
    
    def slotDoubleClicked(self, index):
        name = self.treeView.model().name(index)
        view = self.parent().mainwindow().currentView()
        insert.insert(name, view)
        
    def slotAdd(self):
        """Called when the user wants to add a new snippet."""
        edit.Edit(self, None)
        
    def slotEdit(self):
        """Called when the user wants to edit a snippet."""
        name = self.currentSnippet()
        if name:
            edit.Edit(self, name)
        
    def slotShortcut(self):
        """Called when the user selects the Configure Shortcut action."""
        from widgets import shortcuteditdialog
        name = self.currentSnippet()
        if name:
            collection = self.parent().snippetActions
            action = actions.action(name, None, collection)
            default = collection.defaults().get(name)
            mgr = actioncollectionmanager.manager(self.parent().mainwindow())
            cb = mgr.findShortcutConflict
            dlg = shortcuteditdialog.ShortcutEditDialog(self, cb, (collection, name))
            
            if dlg.editAction(action, default):
                mgr.removeShortcuts(action.shortcuts())
                collection.setShortcuts(name, action.shortcuts())
                self.treeView.update()
            
    def slotDelete(self):
        """Called when the user wants to delete the selected rows."""
        rows = sorted(set(i.row() for i in self.treeView.selectedIndexes()), reverse=True)
        if rows:
            for row in rows:
                name = self.treeView.model().names()[row]
                self.parent().snippetActions.setShortcuts(name, [])
                self.treeView.model().removeRow(row)
            self.updateFilter()
    
    def slotApply(self):
        """Called when the user clicks the apply button. Applies current snippet."""
        name = self.currentSnippet()
        if name:
            view = self.parent().mainwindow().currentView()
            insert.insert(name, view)
    
    def slotImport(self):
        """Called when the user activates the import action."""
        filetypes = "{0} (*.xml);;{1} (*)".format(_("XML Files"), _("All Files"))
        caption = app.caption(_("dialog title", "Import Snippets"))
        filename = None
        filename = QFileDialog.getOpenFileName(self, caption, filename, filetypes)
        if filename:
            from . import import_export
            import_export.load(filename, self)
        
    def slotExport(self):
        """Called when the user activates the export action."""
        allrows = [row for row in range(model.model().rowCount())
                       if not self.treeView.isRowHidden(row, QModelIndex())]
        selectedrows = [i.row() for i in self.treeView.selectedIndexes()
                                if i.column() == 0 and i.row() in allrows]
        names = self.treeView.model().names()
        names = [names[row] for row in selectedrows or allrows]
        
        filetypes = "{0} (*.xml);;{1} (*)".format(_("XML Files"), _("All Files"))
        n = len(names)
        caption = app.caption(_("dialog title",
            "Export {num} Snippet", "Export {num} Snippets", n).format(num=n))
        filename = QFileDialog.getSaveFileName(self, caption, None, filetypes)
        if filename:
            from . import import_export
            try:
                import_export.save(names, filename)
            except (IOError, OSError) as e:
                QMessageBox.critical(self, _("Error"), _(
                    "Can't write to destination:\n\n{url}\n\n{error}").format(
                    url=filename, error=e.strerror))
        
    def slotRestore(self):
        """Called when the user activates the Restore action."""
        from . import restore
        dlg = restore.RestoreDialog(self)
        dlg.setWindowModality(Qt.WindowModal)
        dlg.populate()
        dlg.show()
        dlg.finished.connect(dlg.deleteLater)
        
    def slotHelp(self):
        """Called when the user clicks the small help button."""
        userguide.show("snippets")
        
    def currentSnippet(self):
        """Returns the name of the current snippet if it is visible."""
        row = self.treeView.currentIndex().row()
        if row != -1 and not self.treeView.isRowHidden(row, QModelIndex()):
            return self.treeView.model().names()[row]

    def updateFilter(self):
        """Called when the text in the entry changes, updates search results."""
        text = self.searchEntry.text()
        ltext = text.lower()
        filterVars = text.startswith(':')
        if filterVars:
            try:
                fvar, fval = text[1:].split(None, 1)
                fhide = lambda v: v.get(fvar) in (True, None) or fval not in v.get(fvar)
            except ValueError:
                fvar = text[1:].strip()
                fhide = lambda v: not v.get(fvar)
        for row in range(self.treeView.model().rowCount()):
            name = self.treeView.model().names()[row]
            nameid = snippets.get(name).variables.get('name', '')
            if filterVars:
                hide = fhide(snippets.get(name).variables)
            elif nameid == text:
                i = self.treeView.model().createIndex(row, 0)
                self.treeView.selectionModel().setCurrentIndex(i, QItemSelectionModel.SelectCurrent | QItemSelectionModel.Rows)
                hide = False
            elif nameid.lower().startswith(ltext):
                hide = False
            elif ltext in snippets.title(name).lower():
                hide = False
            else:
                hide = True
            self.treeView.setRowHidden(row, QModelIndex(), hide)
        self.updateText()
            
    def updateText(self):
        """Called when the current snippet changes."""
        name = self.currentSnippet()
        self.textView.clear()
        if name:
            s = snippets.get(name)
            self.highlighter.setPython('python' in s.variables)
            self.textView.setPlainText(s.text)
        
    def updateColumnSizes(self):
        self.treeView.resizeColumnToContents(0)
        self.treeView.resizeColumnToContents(1)