예제 #1
0
class OWItemsets(widget.OWWidget):
    name = 'Frequent Itemsets'
    description = 'Explore sets of items that frequently appear together.'
    icon = 'icons/FrequentItemsets.svg'
    priority = 10

    inputs = [("Data", Table, 'set_data')]
    outputs = [(Output.DATA, Table)]

    minSupport = settings.Setting(30)
    maxItemsets = settings.Setting(10000)
    filterSearch = settings.Setting(True)
    autoFind = settings.Setting(False)
    autoSend = settings.Setting(True)
    filterKeywords = settings.Setting('')
    filterMinItems = settings.Setting(1)
    filterMaxItems = settings.Setting(10000)

    UserAdviceMessages = [
        widget.Message(
            'Itemset are listed in item-sorted order, i.e. '
            'an itemset containing A and B is only listed once, as '
            'A > B (and not also B > A).', 'itemsets-order',
            widget.Message.Warning),
        widget.Message(
            'To select all the itemsets that are descendants of '
            '(include) some item X (i.e. the whole subtree), you '
            'can fold the subtree at that item and then select it.',
            'itemsets-order', widget.Message.Information)
    ]

    def __init__(self):
        self._is_running = False
        self.isRegexMatch = lambda x: True
        self.tree = QTreeWidget(self.mainArea,
                                columnCount=2,
                                allColumnsShowFocus=True,
                                alternatingRowColors=True,
                                selectionMode=QTreeWidget.ExtendedSelection,
                                uniformRowHeights=True)
        self.tree.setHeaderLabels(["Itemsets", "Support", "%"])
        self.tree.header().setStretchLastSection(True)
        self.tree.itemSelectionChanged.connect(self.selectionChanged)
        self.mainArea.layout().addWidget(self.tree)

        box = gui.widgetBox(self.controlArea, "Info")
        self.nItemsets = self.nSelectedExamples = self.nSelectedItemsets = ''
        gui.label(box, self, "Number of itemsets: %(nItemsets)s")
        gui.label(box, self, "Selected itemsets: %(nSelectedItemsets)s")
        gui.label(box, self, "Selected examples: %(nSelectedExamples)s")
        hbox = gui.widgetBox(box, orientation='horizontal')
        gui.button(hbox, self, "Expand all", callback=self.tree.expandAll)
        gui.button(hbox, self, "Collapse all", callback=self.tree.collapseAll)

        box = gui.widgetBox(self.controlArea, 'Find itemsets')
        gui.valueSlider(box,
                        self,
                        'minSupport',
                        values=[.0001, .0005, .001, .005, .01, .05, .1, .5] +
                        list(range(1, 101)),
                        label='Minimal support:',
                        labelFormat="%g%%",
                        callback=lambda: self.find_itemsets())
        gui.hSlider(box,
                    self,
                    'maxItemsets',
                    minValue=10000,
                    maxValue=100000,
                    step=10000,
                    label='Max. number of itemsets:',
                    labelFormat="%d",
                    callback=lambda: self.find_itemsets())
        self.button = gui.auto_commit(box,
                                      self,
                                      'autoFind',
                                      'Find itemsets',
                                      commit=self.find_itemsets)

        box = gui.widgetBox(self.controlArea, 'Filter itemsets')
        gui.lineEdit(box,
                     self,
                     'filterKeywords',
                     'Contains:',
                     callback=self.filter_change,
                     orientation='horizontal',
                     tooltip='A comma or space-separated list of regular '
                     'expressions.')
        hbox = gui.widgetBox(box, orientation='horizontal')
        gui.spin(hbox,
                 self,
                 'filterMinItems',
                 1,
                 998,
                 label='Min. items:',
                 callback=self.filter_change)
        gui.spin(hbox,
                 self,
                 'filterMaxItems',
                 2,
                 999,
                 label='Max. items:',
                 callback=self.filter_change)
        gui.checkBox(box,
                     self,
                     'filterSearch',
                     label='Apply these filters in search',
                     tooltip='If checked, the itemsets are filtered according '
                     'to these filter conditions already in the search '
                     'phase. \nIf unchecked, the only filters applied '
                     'during search are the ones above, '
                     'and the itemsets are \nfiltered afterwards only for '
                     'display, i.e. only the matching itemsets are shown.')

        gui.rubber(hbox)

        gui.rubber(self.controlArea)
        gui.auto_commit(self.controlArea, self, 'autoSend', 'Send selection')

        self.filter_change()

    ITEM_DATA_ROLE = Qt.UserRole + 1

    def selectionChanged(self):
        X = self.X
        mapping = self.onehot_mapping
        instances = set()
        where = np.where

        def whole_subtree(node):
            yield node
            for i in range(node.childCount()):
                yield from whole_subtree(node.child(i))

        def itemset(node):
            while node:
                yield node.data(0, self.ITEM_DATA_ROLE)
                node = node.parent()

        def selection_ranges(node):
            n_children = node.childCount()
            if n_children:
                yield (self.tree.indexFromItem(node.child(0)),
                       self.tree.indexFromItem(node.child(n_children - 1)))
            for i in range(n_children):
                yield from selection_ranges(node.child(i))

        nSelectedItemsets = 0
        item_selection = QItemSelection()
        for node in self.tree.selectedItems():
            nodes = (node, ) if node.isExpanded() else whole_subtree(node)
            if not node.isExpanded():
                for srange in selection_ranges(node):
                    item_selection.select(*srange)
            for node in nodes:
                nSelectedItemsets += 1
                cols, vals = zip(*(mapping[i] for i in itemset(node)))
                if issparse(X):
                    rows = (len(cols) == np.bincount(
                        (X[:, cols] != 0).indices,
                        minlength=X.shape[0])).nonzero()[0]
                else:
                    rows = where((X[:, cols] == vals).all(axis=1))[0]
                instances.update(rows)
        self.tree.itemSelectionChanged.disconnect(self.selectionChanged)
        self.tree.selectionModel().select(
            item_selection,
            QItemSelectionModel.Select | QItemSelectionModel.Rows)
        self.tree.itemSelectionChanged.connect(self.selectionChanged)

        self.nSelectedExamples = len(instances)
        self.nSelectedItemsets = nSelectedItemsets
        self.output = self.data[sorted(instances)] or None
        self.commit()

    def commit(self):
        self.send(Output.DATA, self.output)

    def filter_change(self):
        self.warning(9)
        try:
            isRegexMatch = self.isRegexMatch = re.compile(
                '|'.join(
                    i.strip()
                    for i in re.split('(,|\s)+', self.filterKeywords.strip())
                    if i.strip()), re.IGNORECASE).search
        except Exception as e:
            self.warning(9,
                         'Error in regular expression: {}'.format(e.args[0]))
            isRegexMatch = self.isRegexMatch = lambda x: True

        def hide(node, depth, has_kw):
            if not has_kw:
                has_kw = isRegexMatch(node.text(0))
            hidden = (
                sum(
                    hide(node.child(i), depth + 1, has_kw)
                    for i in range(node.childCount())) == node.childCount()
                if node.childCount() else
                (not has_kw
                 or not self.filterMinItems <= depth <= self.filterMaxItems))
            node.setHidden(hidden)
            return hidden

        hide(self.tree.invisibleRootItem(), 0, False)

    class TreeWidgetItem(QTreeWidgetItem):
        def data(self, column, role):
            """Construct lazy tooltips"""
            if role != Qt.ToolTipRole:
                return super().data(column, role)
            tooltip = []
            while self:
                tooltip.append(self.text(0))
                self = self.parent()
            return ' '.join(reversed(tooltip))

    def find_itemsets(self):
        if self.data is None:
            return
        if self._is_running:
            return
        self._is_running = True

        data = self.data
        self.tree.clear()
        self.tree.setUpdatesEnabled(False)
        self.tree.blockSignals(True)

        class ItemDict(dict):
            def __init__(self, item):
                self.item = item

        top = ItemDict(self.tree.invisibleRootItem())
        X, mapping = OneHot.encode(data)
        self.onehot_mapping = mapping
        ITEM_FMT = '{}' if issparse(data.X) else '{}={}'
        names = {
            item: ITEM_FMT.format(var.name, val)
            for item, var, val in OneHot.decode(mapping.keys(), data, mapping)
        }
        nItemsets = 0

        filterSearch = self.filterSearch
        filterMinItems, filterMaxItems = self.filterMinItems, self.filterMaxItems
        isRegexMatch = self.isRegexMatch

        # Find itemsets and populate the TreeView
        with self.progressBar(self.maxItemsets + 1) as progress:
            for itemset, support in frequent_itemsets(X,
                                                      self.minSupport / 100):

                if filterSearch and not filterMinItems <= len(
                        itemset) <= filterMaxItems:
                    continue

                parent = top
                first_new_item = None
                itemset_matches_filter = False

                for item in sorted(itemset):
                    name = names[item]

                    if filterSearch and not itemset_matches_filter:
                        itemset_matches_filter = isRegexMatch(name)

                    child = parent.get(name)
                    if child is None:
                        try:
                            wi = self.TreeWidgetItem(parent.item, [
                                name,
                                str(support), '{:.4g}'.format(
                                    100 * support / len(data))
                            ])
                        except RuntimeError:
                            # FIXME: When autoFind was in effect and the support
                            # slider was moved, this line excepted with:
                            #     RuntimeError: wrapped C/C++ object of type
                            #                   TreeWidgetItem has been deleted
                            return
                        wi.setData(0, self.ITEM_DATA_ROLE, item)
                        child = parent[name] = ItemDict(wi)

                        if first_new_item is None:
                            first_new_item = (parent, name)
                    parent = child

                if filterSearch and not itemset_matches_filter:
                    parent, name = first_new_item
                    parent.item.removeChild(parent[name].item)
                    del parent[name].item
                    del parent[name]
                else:
                    nItemsets += 1
                    progress.advance()
                if nItemsets >= self.maxItemsets:
                    break

        if not filterSearch:
            self.filter_change()
        self.nItemsets = nItemsets
        self.nSelectedItemsets = 0
        self.nSelectedExamples = 0
        self.tree.expandAll()
        for i in range(self.tree.columnCount()):
            self.tree.resizeColumnToContents(i)
        self.tree.setUpdatesEnabled(True)
        self.tree.blockSignals(False)
        self._is_running = False

    def set_data(self, data):
        self.data = data
        is_error = False
        if data is not None:
            self.warning(0)
            self.error(1)
            self.button.setDisabled(False)
            self.X = data.X
            if issparse(data.X):
                self.X = data.X.tocsc()
            else:
                if not data.domain.has_discrete_attributes():
                    self.error(
                        1, 'Discrete features required but data has none.')
                    is_error = True
                    self.button.setDisabled(True)
                elif data.domain.has_continuous_attributes():
                    self.warning(
                        0,
                        'Data has continuous attributes which will be skipped.'
                    )
        else:
            self.output = None
            self.commit()
        if self.autoFind and not is_error:
            self.find_itemsets()
예제 #2
0
class filetransfersInfoWidget(infoWidget):

	def __init__(self, client, parent = None):
		infoWidget.__init__(self, client, parent)

		self.__informations = []
		self.__loc = QLocale()
		self.__mainLayout = QVBoxLayout()
		self.__treeWidget = QTreeWidget()

		self.__treeWidget.setColumnCount(5)
		#stlpce projekt, nazov, progress, prenesene (kolko z kolko), rychlost
		labels = [self.tr("Project name"), self.tr("File name"), self.tr("Progress"), self.tr("Size"), self.tr("Speed")]
		self.__treeWidget.setHeaderLabels(labels)
		self.__treeWidget.setUniformRowHeights(True)

		self.__mainLayout.addWidget(self.__treeWidget)
		self.setMainLayout(self.__mainLayout)
		
		self.connect(client, SIGNAL("getFileTransfersRecv(PyQt_PyObject)"), self.__updateFileTransfers)
		client.getFileTransfers()

	def __jednotka(self, data):
		if data < 1024:
			return self.__loc.toString(int(data))+" B"
		elif data < 1024*1024:
			return self.__loc.toString(float(data) / float(1024), 'f', 2)+" kB"
		elif data < 1024*1024*1024:
			return self.__loc.toString(float(data) / float(1024*1024), 'f', 2)+" MB"
		elif data < 1024*1024*1024*1024:
			return self.__loc.toString(float(data) / float(1024*1024*1024), 'f', 2)+" GB"
		return self.__loc.toString(int(data))+" B"

	def __updateData(self, data, item, progressBar):
		if data['bytes_xferred'] is None:
			data['bytes_xferred'] = 0
			progressBar.setValue(0)
		else:
			progressBar.setValue(int(100.0 * data['bytes_xferred'] / data['nbytes']))
		item.setData(0, Qt.DisplayRole, QVariant(data['project_name']))
		item.setData(1, Qt.DisplayRole, QVariant(data['name']))
		if data['xfer_speed'] is None:
			data['xfer_speed'] = 0
		item.setData(3, Qt.DisplayRole, QVariant(self.__jednotka(data['bytes_xferred']) + '/' + self.__jednotka(data['nbytes'])))
		item.setData(4, Qt.DisplayRole, QVariant(self.__jednotka(data['xfer_speed'])+self.tr('/s')))


	def __updateFileTransfers(self, transfers):
		pridat = []

		for inf in self.__informations:
			inf[4] = True

		for transfer in transfers:
			pridame = True
			for inf in self.__informations:
				if inf[0] == (transfer['name'], transfer['project_url']):
					pridame = False
					inf[3] = transfer
					inf[4] = False
					break
			if pridame:
				pridat.append(transfer)

		for polozka in pridat:
			treeItem = QTreeWidgetItem(self.__treeWidget)
			progressBar = QProgressBar()
			progressBar.setAutoFillBackground(True)
			progressBar.setFixedHeight(self.__treeWidget.rowHeight(self.__treeWidget.indexFromItem(treeItem)))
			self.__treeWidget.setItemWidget(treeItem, 2, progressBar)
			self.__informations.append([(polozka['name'], polozka['project_url']), treeItem, progressBar, polozka, False])

			self.__updateData(polozka, treeItem, progressBar)

		for i in range(len(self.__informations) - 1, -1, -1):
			# odstranenie
			if self.__informations[i][4]:
				self.__treeWidget.takeTopLevelItem(i)
				self.__informations.pop(i)
			# aktualizacia
			else:
				inf = self.__informations[i]
				self.__updateData(inf[3], inf[1], inf[2])


		QTimer.singleShot(1000, self.sender().getFileTransfers)
예제 #3
0
class OWItemsets(widget.OWWidget):
    name = 'Frequent Itemsets'
    description = 'Explore sets of items that frequently appear together.'
    icon = 'icons/FrequentItemsets.svg'
    priority = 10

    inputs = [("Data", Table, 'set_data')]
    outputs = [(Output.DATA, Table)]

    minSupport = settings.Setting(30)
    maxItemsets = settings.Setting(10000)
    filterSearch = settings.Setting(True)
    autoFind = settings.Setting(False)
    autoSend = settings.Setting(True)
    filterKeywords = settings.Setting('')
    filterMinItems = settings.Setting(1)
    filterMaxItems = settings.Setting(10000)

    UserAdviceMessages = [
        widget.Message('Itemset are listed in item-sorted order, i.e. '
                       'an itemset containing A and B is only listed once, as '
                       'A > B (and not also B > A).',
                       'itemsets-order', widget.Message.Warning),
        widget.Message('To select all the itemsets that are descendants of '
                       '(include) some item X (i.e. the whole subtree), you '
                       'can fold the subtree at that item and then select it.',
                       'itemsets-order', widget.Message.Information)
    ]

    def __init__(self):
        self.tree = QTreeWidget(self.mainArea,
                                columnCount=2,
                                allColumnsShowFocus=True,
                                alternatingRowColors=True,
                                selectionMode=QTreeWidget.ExtendedSelection,
                                uniformRowHeights=True)
        self.tree.setHeaderLabels(["Itemsets", "Support", "%"])
        self.tree.header().setStretchLastSection(True)
        self.tree.itemSelectionChanged.connect(self.selectionChanged)
        self.mainArea.layout().addWidget(self.tree)

        box = gui.widgetBox(self.controlArea, "Info")
        self.nItemsets = self.nSelectedExamples = self.nSelectedItemsets = ''
        gui.label(box, self, "Number of itemsets: %(nItemsets)s")
        gui.label(box, self, "Selected itemsets: %(nSelectedItemsets)s")
        gui.label(box, self, "Selected examples: %(nSelectedExamples)s")
        hbox = gui.widgetBox(box, orientation='horizontal')
        gui.button(hbox, self, "Expand all", callback=self.tree.expandAll)
        gui.button(hbox, self, "Collapse all", callback=self.tree.collapseAll)

        box = gui.widgetBox(self.controlArea, 'Find itemsets')
        gui.hSlider(box, self, 'minSupport', minValue=1, maxValue=100,
                    label='Minimal support:', labelFormat="%d%%",
                    callback=lambda: self.find_itemsets())
        gui.hSlider(box, self, 'maxItemsets', minValue=10000, maxValue=100000, step=10000,
                    label='Max. number of itemsets:', labelFormat="%d",
                    callback=lambda: self.find_itemsets())
        gui.checkBox(box, self, 'filterSearch',
                     label='Apply below filters in search',
                     tooltip='If checked, the itemsets are filtered according '
                             'to below filter conditions already in the search '
                             'phase. \nIf unchecked, the only filters applied '
                             'during search are the ones above, '
                             'and the itemsets are \nfiltered afterwards only for '
                             'display, i.e. only the matching itemsets are shown.')
        self.button = gui.auto_commit(
            box, self, 'autoFind', 'Find itemsets', commit=self.find_itemsets)

        box = gui.widgetBox(self.controlArea, 'Filter itemsets')
        gui.lineEdit(box, self, 'filterKeywords', 'Contains:',
                     callback=self.filter_change, orientation='horizontal',
                     tooltip='A comma or space-separated list of regular '
                             'expressions.')
        hbox = gui.widgetBox(box, orientation='horizontal')
        gui.spin(hbox, self, 'filterMinItems', 1, 998, label='Min. items:',
                 callback=self.filter_change)
        gui.spin(hbox, self, 'filterMaxItems', 2, 999, label='Max. items:',
                 callback=self.filter_change)
        gui.rubber(hbox)

        gui.rubber(self.controlArea)
        gui.auto_commit(self.controlArea, self, 'autoSend', 'Send selection')

        self.filter_change()

    def sendReport(self):
        self.reportSettings("Itemset statistics",
                            [("Number of itemsets", self.nItemsets),
                             ("Selected itemsets", self.nSelectedItemsets),
                             ("Covered examples", self.nSelectedExamples),
                             ])
        self.reportSection("Itemsets")
        self.reportRaw(OWReport.reportTree(self.tree))

    ITEM_DATA_ROLE = Qt.UserRole + 1

    def selectionChanged(self):
        X = self.data.X
        mapping = self.onehot_mapping
        instances = set()
        where = np.where

        def whole_subtree(node):
            yield node
            for i in range(node.childCount()):
                yield from whole_subtree(node.child(i))

        def itemset(node):
            while node:
                yield node.data(0, self.ITEM_DATA_ROLE)
                node = node.parent()

        def selection_ranges(node):
            n_children = node.childCount()
            if n_children:
                yield (self.tree.indexFromItem(node.child(0)),
                       self.tree.indexFromItem(node.child(n_children - 1)))
            for i in range(n_children):
                yield from selection_ranges(node.child(i))

        nSelectedItemsets = 0
        item_selection = QItemSelection()
        for node in self.tree.selectedItems():
            nodes = (node,) if node.isExpanded() else whole_subtree(node)
            if not node.isExpanded():
                for srange in selection_ranges(node):
                    item_selection.select(*srange)
            for node in nodes:
                nSelectedItemsets += 1
                cols, vals = zip(*(mapping[i] for i in itemset(node)))
                instances.update(where((X[:, cols] == vals).all(axis=1))[0])
        self.tree.itemSelectionChanged.disconnect(self.selectionChanged)
        self.tree.selectionModel().select(item_selection,
                                          QItemSelectionModel.Select | QItemSelectionModel.Rows)
        self.tree.itemSelectionChanged.connect(self.selectionChanged)

        self.nSelectedExamples = len(instances)
        self.nSelectedItemsets = nSelectedItemsets
        self.output = self.data[sorted(instances)] or None
        self.commit()

    def commit(self):
        self.send(Output.DATA, self.output)

    def filter_change(self):
        isRegexMatch = self.isRegexMatch = re.compile(
            '|'.join(i.strip()
                     for i in re.split('(,|\s)+', self.filterKeywords.strip())
                     if i.strip())).search

        def hide(node, depth, has_kw):
            if not has_kw:
                has_kw = isRegexMatch(node.text(0))
            hidden = (sum(hide(node.child(i), depth + 1, has_kw)
                          for i in range(node.childCount())) == node.childCount()
                      if node.childCount() else
                      (not has_kw or
                       not self.filterMinItems <= depth <= self.filterMaxItems))
            node.setHidden(hidden)
            return hidden

        hide(self.tree.invisibleRootItem(), 0, False)

    class TreeWidgetItem(QTreeWidgetItem):
        def data(self, column, role):
            """Construct lazy tooltips"""
            if role != Qt.ToolTipRole:
                return super().data(column, role)
            tooltip = []
            while self:
                tooltip.append(self.text(0))
                self = self.parent()
            return ' '.join(reversed(tooltip))

    def find_itemsets(self):
        if self.data is None: return
        data = self.data
        self.tree.clear()
        self.tree.setUpdatesEnabled(False)
        self.tree.blockSignals(True)

        class ItemDict(dict):
            def __init__(self, item):
                self.item = item

        top = ItemDict(self.tree.invisibleRootItem())
        X, mapping = OneHot.encode(data)
        self.onehot_mapping = mapping
        names = {item: '{}={}'.format(var.name, val)
                 for item, var, val in OneHot.decode(mapping.keys(), data, mapping)}
        nItemsets = 0

        filterSearch = self.filterSearch
        filterMinItems, filterMaxItems = self.filterMinItems, self.filterMaxItems
        isRegexMatch = self.isRegexMatch

        # Find itemsets and populate the TreeView
        progress = gui.ProgressBar(self, self.maxItemsets + 1)
        for itemset, support in frequent_itemsets(X, self.minSupport / 100):

            if filterSearch and not filterMinItems <= len(itemset) <= filterMaxItems:
                continue

            parent = top
            first_new_item = None
            itemset_matches_filter = False

            for item in sorted(itemset):
                name = names[item]

                if filterSearch and not itemset_matches_filter:
                    itemset_matches_filter = isRegexMatch(name)

                child = parent.get(name)
                if child is None:
                    wi = self.TreeWidgetItem(parent.item, [name, str(support), '{:.1f}'.format(100 * support / len(data))])
                    wi.setData(0, self.ITEM_DATA_ROLE, item)
                    child = parent[name] = ItemDict(wi)

                    if first_new_item is None:
                        first_new_item = (parent, name)
                parent = child

            if filterSearch and not itemset_matches_filter:
                parent, name = first_new_item
                parent.item.removeChild(parent[name].item)
                del parent[name].item
                del parent[name]
            else:
                nItemsets += 1
                progress.advance()
            if nItemsets >= self.maxItemsets:
                break

        if not filterSearch:
            self.filter_change()
        self.nItemsets = nItemsets
        self.nSelectedItemsets = 0
        self.nSelectedExamples = 0
        self.tree.expandAll()
        for i in range(self.tree.columnCount()):
            self.tree.resizeColumnToContents(i)
        self.tree.setUpdatesEnabled(True)
        self.tree.blockSignals(False)
        progress.finish()

    def set_data(self, data):
        self.data = data
        if data is not None:
            self.warning(0, 'Data has continuous attributes which will be skipped.'
                            if data.domain.has_continuous_attributes() else None)
            self.error(1, 'Discrete features required but data has none.'
                          if not data.domain.has_discrete_attributes() else None)
            self.button.setDisabled(not data.domain.has_discrete_attributes())
        if self.autoFind:
            self.find_itemsets()
예제 #4
0
class OWKEGGPathwayBrowser(widget.OWWidget):
    name = "KEGG Pathways"
    description = "Browse KEGG pathways that include an input set of genes."
    icon = "../widgets/icons/KEGGPathways.svg"
    priority = 2030

    inputs = [("Examples", Orange.data.Table, "SetData", widget.Default),
              ("Reference", Orange.data.Table, "SetRefData")]
    outputs = [("Selected Examples", Orange.data.Table, widget.Default),
               ("Unselected Examples", Orange.data.Table)]

    settingsHandler = settings.DomainContextHandler()

    organismIndex = settings.ContextSetting(0)
    geneAttrIndex = settings.ContextSetting(0)
    useAttrNames = settings.ContextSetting(False)

    autoCommit = settings.Setting(False)
    autoResize = settings.Setting(True)
    useReference = settings.Setting(False)
    showOrthology = settings.Setting(True)

    Ready, Initializing, Running = 0, 1, 2

    def __init__(self, parent=None):
        super().__init__(parent)

        self.organismCodes = []
        self._changedFlag = False
        self.__invalidated = False
        self.__runstate = OWKEGGPathwayBrowser.Initializing
        self.__in_setProgress = False

        self.controlArea.setMaximumWidth(250)
        box = gui.widgetBox(self.controlArea, "Info")
        self.infoLabel = gui.widgetLabel(box, "No data on input\n")

        # Organism selection.
        box = gui.widgetBox(self.controlArea, "Organism")
        self.organismComboBox = gui.comboBox(
            box, self, "organismIndex",
            items=[],
            callback=self.Update,
            addSpace=True,
            tooltip="Select the organism of the input genes")

        # Selection of genes attribute
        box = gui.widgetBox(self.controlArea, "Gene attribute")
        self.geneAttrCandidates = itemmodels.VariableListModel(parent=self)
        self.geneAttrCombo = gui.comboBox(
            box, self, "geneAttrIndex", callback=self.Update)
        self.geneAttrCombo.setModel(self.geneAttrCandidates)

        gui.checkBox(box, self, "useAttrNames",
                    "Use variable names", disables=[(-1, self.geneAttrCombo)],
                    callback=self.Update)

        self.geneAttrCombo.setDisabled(bool(self.useAttrNames))

        gui.separator(self.controlArea)

        gui.checkBox(self.controlArea, self, "useReference",
                    "From signal", box="Reference", callback=self.Update)

        gui.separator(self.controlArea)

        gui.checkBox(self.controlArea, self, "showOrthology",
                     "Show pathways in full orthology", box="Orthology",
                     callback=self.UpdateListView)

        gui.checkBox(self.controlArea, self, "autoResize",
                     "Resize to fit", box="Image",
                     callback=self.UpdatePathwayViewTransform)

        box = gui.widgetBox(self.controlArea, "Cache Control")

        gui.button(box, self, "Clear cache",
                     callback=self.ClearCache,
                     tooltip="Clear all locally cached KEGG data.")

        gui.separator(self.controlArea)

        gui.auto_commit(self.controlArea, self, "autoCommit", "Commit")

        gui.rubber(self.controlArea)

        spliter = QSplitter(Qt.Vertical, self.mainArea)
        self.pathwayView = PathwayView(self, spliter)
        self.pathwayView.scene().selectionChanged.connect(
            self._onSelectionChanged
        )
        self.mainArea.layout().addWidget(spliter)

        self.listView = QTreeWidget(
            allColumnsShowFocus=True,
            selectionMode=QTreeWidget.SingleSelection,
            sortingEnabled=True,
            maximumHeight=200)

        spliter.addWidget(self.listView)

        self.listView.setColumnCount(4)
        self.listView.setHeaderLabels(
            ["Pathway", "P value", "Genes", "Reference"])

        self.listView.itemSelectionChanged.connect(self.UpdatePathwayView)

        select = QAction(
            "Select All", self,
            shortcut=QKeySequence.SelectAll
        )
        select.triggered.connect(self.selectAll)
        self.addAction(select)

        self.data = None
        self.refData = None

        self._executor = concurrent.ThreadExecutor()
        self.setEnabled(False)
        self.setBlocking(True)
        progress = concurrent.methodinvoke(self, "setProgress", (float,))

        def get_genome():
            """Return a KEGGGenome with the common org entries precached."""
            genome = kegg.KEGGGenome()

            essential = genome.essential_organisms()
            common = genome.common_organisms()
            # Remove duplicates of essential from common.
            # (essential + common list as defined here will be used in the
            # GUI.)
            common = [c for c in common if c not in essential]

            # TODO: Add option to specify additional organisms not
            # in the common list.

            keys = list(map(genome.org_code_to_entry_key, essential + common))

            genome.pre_cache(keys, progress_callback=progress)
            return (keys, genome)

        self._genomeTask = task = concurrent.Task(function=get_genome)
        task.finished.connect(self.__initialize_finish)

        self.progressBarInit()
        self.infoLabel.setText("Fetching organism definitions\n")
        self._executor.submit(task)

    def __initialize_finish(self):
        if self.__runstate != OWKEGGPathwayBrowser.Initializing:
            return

        try:
            keys, genome = self._genomeTask.result()
        except Exception as err:
            self.error(0, str(err))
            raise

        self.progressBarFinished()
        self.setEnabled(True)
        self.setBlocking(False)

        entries = [genome[key] for key in keys]
        items = [entry.definition for entry in entries]
        codes = [entry.organism_code for entry in entries]

        self.organismCodes = codes
        self.organismComboBox.clear()
        self.organismComboBox.addItems(items)
        self.organismComboBox.setCurrentIndex(self.organismIndex)

        self.infoLabel.setText("No data on input\n")

    def Clear(self):
        """
        Clear the widget state.
        """
        self.queryGenes = []
        self.referenceGenes = []
        self.genes = {}
        self.uniqueGenesDict = {}
        self.revUniqueGenesDict = {}
        self.pathways = {}
        self.org = None
        self.geneAttrCandidates[:] = []

        self.infoLabel.setText("No data on input\n")
        self.listView.clear()
        self.pathwayView.SetPathway(None)

        self.send("Selected Examples", None)
        self.send("Unselected Examples", None)

    def SetData(self, data=None):
        if self.__runstate == OWKEGGPathwayBrowser.Initializing:
            self.__initialize_finish()

        self.closeContext()
        self.data = data
        self.warning(0)
        self.error(0)
        self.information(0)

        if data is not None:
            vars = data.domain.variables + data.domain.metas
            vars = [var for var in vars
                    if isinstance(var, Orange.data.StringVariable)]
            self.geneAttrCandidates[:] = vars

            # Try to guess the gene name variable
            names_lower = [v.name.lower() for v in vars]
            scores = [(name == "gene", "gene" in name)
                      for name in names_lower]
            imax, _ = max(enumerate(scores), key=itemgetter(1))
            self.geneAttrIndex = imax

            taxid = data_hints.get_hint(data, "taxid", None)
            if taxid:
                try:
                    code = kegg.from_taxid(taxid)
                    self.organismIndex = self.organismCodes.index(code)
                except Exception as ex:
                    print(ex, taxid)

            self.useAttrNames = data_hints.get_hint(data, "genesinrows",
                                                    self.useAttrNames)

            self.openContext(data)
        else:
            self.Clear()

        self.__invalidated = True

    def SetRefData(self, data=None):
        self.refData = data
        self.information(1)

        if data is not None and self.useReference:
            self.__invalidated = True

    def handleNewSignals(self):
        if self.__invalidated:
            self.Update()
            self.__invalidated = False

    def UpdateListView(self):
        self.bestPValueItem = None
        self.listView.clear()
        if not self.data:
            return

        allPathways = self.org.pathways()
        allRefPathways = kegg.pathways("map")

        items = []
        kegg_pathways = kegg.KEGGPathways()

        org_code = self.organismCodes[min(self.organismIndex,
                                          len(self.organismCodes) - 1)]

        if self.showOrthology:
            self.koOrthology = kegg.KEGGBrite("ko00001")
            self.listView.setRootIsDecorated(True)
            path_ids = set([s[-5:] for s in self.pathways.keys()])

            def _walkCollect(koEntry):
                num = koEntry.title[:5] if koEntry.title else None
                if num in path_ids:
                    return ([koEntry] +
                            reduce(lambda li, c: li + _walkCollect(c),
                                   [child for child in koEntry.entries],
                                   []))
                else:
                    c = reduce(lambda li, c: li + _walkCollect(c),
                               [child for child in koEntry.entries],
                               [])
                    return c + (c and [koEntry] or [])

            allClasses = reduce(lambda li1, li2: li1 + li2,
                                [_walkCollect(c) for c in self.koOrthology],
                                [])

            def _walkCreate(koEntry, lvItem):
                item = QTreeWidgetItem(lvItem)
                id = "path:" + org_code + koEntry.title[:5]

                if koEntry.title[:5] in path_ids:
                    p = kegg_pathways.get_entry(id)
                    if p is None:
                        # In case the genesets still have obsolete entries
                        name = koEntry.title
                    else:
                        name = p.name
                    genes, p_value, ref = self.pathways[id]
                    item.setText(0, name)
                    item.setText(1, "%.5f" % p_value)
                    item.setText(2, "%i of %i" % (len(genes), len(self.genes)))
                    item.setText(3, "%i of %i" % (ref, len(self.referenceGenes)))
                    item.pathway_id = id if p is not None else None
                else:
                    if id in allPathways:
                        text = kegg_pathways.get_entry(id).name
                    else:
                        text = koEntry.title
                    item.setText(0, text)

                    if id in allPathways:
                        item.pathway_id = id
                    elif "path:map" + koEntry.title[:5] in allRefPathways:
                        item.pathway_id = "path:map" + koEntry.title[:5]
                    else:
                        item.pathway_id = None

                for child in koEntry.entries:
                    if child in allClasses:
                        _walkCreate(child, item)

            for koEntry in self.koOrthology:
                if koEntry in allClasses:
                    _walkCreate(koEntry, self.listView)

            self.listView.update()
        else:
            self.listView.setRootIsDecorated(False)
            pathways = self.pathways.items()
            pathways = sorted(pathways, key=lambda item: item[1][1])

            for id, (genes, p_value, ref) in pathways:
                item = QTreeWidgetItem(self.listView)
                item.setText(0, kegg_pathways.get_entry(id).name)
                item.setText(1, "%.5f" % p_value)
                item.setText(2, "%i of %i" % (len(genes), len(self.genes)))
                item.setText(3, "%i of %i" % (ref, len(self.referenceGenes)))
                item.pathway_id = id
                items.append(item)

        self.bestPValueItem = items and items[0] or None
        self.listView.expandAll()
        for i in range(4):
            self.listView.resizeColumnToContents(i)

        if self.bestPValueItem:
            index = self.listView.indexFromItem(self.bestPValueItem)
            self.listView.selectionModel().select(
                index, QItemSelectionModel.ClearAndSelect
            )

    def UpdatePathwayView(self):
        items = self.listView.selectedItems()

        if len(items) > 0:
            item = items[0]
        else:
            item = None

        self.commit()
        item = item or self.bestPValueItem
        if not item or not item.pathway_id:
            self.pathwayView.SetPathway(None)
            return

        def get_kgml_and_image(pathway_id):
            """Return an initialized KEGGPathway with pre-cached data"""
            p = kegg.KEGGPathway(pathway_id)
            p._get_kgml()  # makes sure the kgml file is downloaded
            p._get_image_filename()  # makes sure the image is downloaded
            return (pathway_id, p)

        self.setEnabled(False)
        self._pathwayTask = concurrent.Task(
            function=lambda: get_kgml_and_image(item.pathway_id)
        )
        self._pathwayTask.finished.connect(self._onPathwayTaskFinshed)
        self._executor.submit(self._pathwayTask)

    def _onPathwayTaskFinshed(self):
        self.setEnabled(True)
        pathway_id, self.pathway = self._pathwayTask.result()
        self.pathwayView.SetPathway(
            self.pathway,
            self.pathways.get(pathway_id, [[]])[0]
        )

    def UpdatePathwayViewTransform(self):
        self.pathwayView.updateTransform()

    def Update(self):
        """
        Update (recompute enriched pathways) the widget state.
        """
        if not self.data:
            return

        self.error(0)
        self.information(0)

        # XXX: Check data in setData, do not even alow this to be executed if
        # data has no genes
        try:
            genes = self.GeneNamesFromData(self.data)
        except ValueError:
            self.error(0, "Cannot extract gene names from input.")
            genes = []

        if not self.useAttrNames and any("," in gene for gene in genes):
            genes = reduce(add, (split_and_strip(gene, ",")
                                 for gene in genes),
                           [])
            self.information(0,
                             "Separators detected in input gene names. "
                             "Assuming multiple genes per instance.")
        self.queryGenes = genes

        self.information(1)
        reference = None
        if self.useReference and self.refData:
            reference = self.GeneNamesFromData(self.refData)
            if not self.useAttrNames \
                    and any("," in gene for gene in reference):
                reference = reduce(add, (split_and_strip(gene, ",")
                                         for gene in reference),
                                   [])
                self.information(1,
                                 "Separators detected in reference gene "
                                 "names. Assuming multiple genes per "
                                 "instance.")

        org_code = self.SelectedOrganismCode()

        def run_enrichment(org_code, genes, reference=None, progress=None):
            org = kegg.KEGGOrganism(org_code)
            if reference is None:
                reference = org.get_genes()

            # Map 'genes' and 'reference' sets to unique KEGG identifiers
            unique_genes, _, _ = org.get_unique_gene_ids(set(genes))
            unique_ref_genes, _, _ = org.get_unique_gene_ids(set(reference))

            taxid = kegg.to_taxid(org.org_code)
            # Map the taxid back to standard 'common' taxids
            # (as used by 'geneset') if applicable
            r_tax_map = dict((v, k) for k, v in
                             kegg.KEGGGenome.TAXID_MAP.items())
            if taxid in r_tax_map:
                taxid = r_tax_map[taxid]

            # We use the kegg pathway gene sets provided by 'geneset' for
            # the enrichment calculation.

            # Ensure we are using the latest genesets
            # TODO: ?? Is updating the index enough?
            serverfiles.update(geneset.sfdomain, "index.pck")
            kegg_gs_collections = geneset.collections(
                (("KEGG", "pathways"), taxid)
            )

            pathways = pathway_enrichment(
                kegg_gs_collections, unique_genes.keys(),
                unique_ref_genes.keys(),
                callback=progress
            )
            # Ensure that pathway entries are pre-cached for later use in the
            # list/tree view
            kegg_pathways = kegg.KEGGPathways()
            kegg_pathways.pre_cache(
                pathways.keys(), progress_callback=progress
            )

            return pathways, org, unique_genes, unique_ref_genes

        self.progressBarInit()
        self.setEnabled(False)
        self.infoLabel.setText("Retrieving...\n")

        progress = concurrent.methodinvoke(self, "setProgress", (float,))
        self._enrichTask = concurrent.Task(
            function=lambda:
                run_enrichment(org_code, genes, reference, progress)
        )
        self._enrichTask.finished.connect(self._onEnrichTaskFinished)
        self._executor.submit(self._enrichTask)

    def _onEnrichTaskFinished(self):
        self.setEnabled(True)
        self.setBlocking(False)
        try:
            pathways, org, unique_genes, unique_ref_genes = \
                self._enrichTask.result()
        except Exception:
            raise

        self.progressBarFinished()

        self.org = org
        self.genes = unique_genes.keys()
        self.uniqueGenesDict = unique_genes
        self.revUniqueGenesDict = dict([(val, key) for key, val in
                                        self.uniqueGenesDict.items()])
        self.referenceGenes = unique_ref_genes.keys()
        self.pathways = pathways

        if not self.pathways:
            self.warning(0, "No enriched pathways found.")
        else:
            self.warning(0)

        count = len(set(self.queryGenes))
        self.infoLabel.setText(
            "%i unique gene names on input\n"
            "%i (%.1f%%) genes names matched" %
            (count, len(unique_genes),
             100.0 * len(unique_genes) / count if count else 0.0)
        )

        self.UpdateListView()

    @Slot(float)
    def setProgress(self, value):
        if self.__in_setProgress:
            return

        self.__in_setProgress = True
        self.progressBarSet(value)
        self.__in_setProgress = False

    def GeneNamesFromData(self, data):
        """
        Extract and return gene names from `data`.
        """
        if self.useAttrNames:
            genes = [str(v.name).strip() for v in data.domain.attributes]
        elif self.geneAttrCandidates:
            index = min(self.geneAttrIndex, len(self.geneAttrCandidates) - 1)
            geneAttr = self.geneAttrCandidates[index]
            genes = [str(e[geneAttr]) for e in data
                     if not numpy.isnan(e[geneAttr])]
        else:
            raise ValueError("No gene names in data.")
        return genes

    def SelectedOrganismCode(self):
        """
        Return the selected organism code.
        """
        return self.organismCodes[min(self.organismIndex,
                                      len(self.organismCodes) - 1)]

    def selectAll(self):
        """
        Select all items in the pathway view.
        """
        changed = False
        scene = self.pathwayView.scene()
        with disconnected(scene.selectionChanged, self._onSelectionChanged):
            for item in scene.items():
                if item.flags() & QGraphicsItem.ItemIsSelectable and \
                        not item.isSelected():
                    item.setSelected(True)
                    changed = True
        if changed:
            self._onSelectionChanged()

    def _onSelectionChanged(self):
        # Item selection in the pathwayView/scene has changed
        self.commit()

    def commit(self):
        if self.data:
            selectedItems = self.pathwayView.scene().selectedItems()
            selectedGenes = reduce(set.union, [item.marked_objects
                                               for item in selectedItems],
                                   set())

            if self.useAttrNames:
                selected = [self.data.domain[self.uniqueGenesDict[gene]]
                            for gene in selectedGenes]
#                 newDomain = Orange.data.Domain(selectedVars, 0)
                data = self.data[:, selected]
#                 data = Orange.data.Table(newDomain, self.data)
                self.send("Selected Examples", data)
            elif self.geneAttrCandidates:
                geneAttr = self.geneAttrCandidates[min(self.geneAttrIndex,
                                                       len(self.geneAttrCandidates) - 1)]
                selectedIndices = []
                otherIndices = []
                for i, ex in enumerate(self.data):
                    names = [self.revUniqueGenesDict.get(name, None)
                             for name in split_and_strip(str(ex[geneAttr]), ",")]
                    if any(name and name in selectedGenes for name in names):
                        selectedIndices.append(i)
                    else:
                        otherIndices.append(i)

                if selectedIndices:
                    selected = self.data[selectedIndices]
                else:
                    selected = None

                if otherIndices:
                    other = self.data[otherIndices]
                else:
                    other = None

                self.send("Selected Examples", selected)
                self.send("Unselected Examples", other)
        else:
            self.send("Selected Examples", None)
            self.send("Unselected Examples", None)

    def ClearCache(self):
        kegg.caching.clear_cache()

    def onDeleteWidget(self):
        """
        Called before the widget is removed from the canvas.
        """
        super().onDeleteWidget()

        self.org = None
        self._executor.shutdown(wait=False)
        gc.collect()  # Force collection (WHY?)

    def sizeHint(self):
        return QSize(1024, 720)
예제 #5
0
class OWItemsets(widget.OWWidget):
    name = "Frequent Itemsets"
    description = "Explore sets of items that frequently appear together."
    icon = "icons/FrequentItemsets.svg"
    priority = 10

    inputs = [("Data", Table, "set_data")]
    outputs = [(Output.DATA, Table)]

    minSupport = settings.Setting(30)
    maxItemsets = settings.Setting(10000)
    filterSearch = settings.Setting(True)
    autoFind = settings.Setting(False)
    autoSend = settings.Setting(True)
    filterKeywords = settings.Setting("")
    filterMinItems = settings.Setting(1)
    filterMaxItems = settings.Setting(10000)

    UserAdviceMessages = [
        widget.Message(
            "Itemset are listed in item-sorted order, i.e. "
            "an itemset containing A and B is only listed once, as "
            "A > B (and not also B > A).",
            "itemsets-order",
            widget.Message.Warning,
        ),
        widget.Message(
            "To select all the itemsets that are descendants of "
            "(include) some item X (i.e. the whole subtree), you "
            "can fold the subtree at that item and then select it.",
            "itemsets-order",
            widget.Message.Information,
        ),
    ]

    def __init__(self):
        self._is_running = False
        self.isRegexMatch = lambda x: True
        self.tree = QTreeWidget(
            self.mainArea,
            columnCount=2,
            allColumnsShowFocus=True,
            alternatingRowColors=True,
            selectionMode=QTreeWidget.ExtendedSelection,
            uniformRowHeights=True,
        )
        self.tree.setHeaderLabels(["Itemsets", "Support", "%"])
        self.tree.header().setStretchLastSection(True)
        self.tree.itemSelectionChanged.connect(self.selectionChanged)
        self.mainArea.layout().addWidget(self.tree)

        box = gui.widgetBox(self.controlArea, "Info")
        self.nItemsets = self.nSelectedExamples = self.nSelectedItemsets = ""
        gui.label(box, self, "Number of itemsets: %(nItemsets)s")
        gui.label(box, self, "Selected itemsets: %(nSelectedItemsets)s")
        gui.label(box, self, "Selected examples: %(nSelectedExamples)s")
        hbox = gui.widgetBox(box, orientation="horizontal")
        gui.button(hbox, self, "Expand all", callback=self.tree.expandAll)
        gui.button(hbox, self, "Collapse all", callback=self.tree.collapseAll)

        box = gui.widgetBox(self.controlArea, "Find itemsets")
        gui.valueSlider(
            box,
            self,
            "minSupport",
            values=[0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5] + list(range(1, 101)),
            label="Minimal support:",
            labelFormat="%g%%",
            callback=lambda: self.find_itemsets(),
        )
        gui.hSlider(
            box,
            self,
            "maxItemsets",
            minValue=10000,
            maxValue=100000,
            step=10000,
            label="Max. number of itemsets:",
            labelFormat="%d",
            callback=lambda: self.find_itemsets(),
        )
        self.button = gui.auto_commit(box, self, "autoFind", "Find itemsets", commit=self.find_itemsets)

        box = gui.widgetBox(self.controlArea, "Filter itemsets")
        gui.lineEdit(
            box,
            self,
            "filterKeywords",
            "Contains:",
            callback=self.filter_change,
            orientation="horizontal",
            tooltip="A comma or space-separated list of regular " "expressions.",
        )
        hbox = gui.widgetBox(box, orientation="horizontal")
        gui.spin(hbox, self, "filterMinItems", 1, 998, label="Min. items:", callback=self.filter_change)
        gui.spin(hbox, self, "filterMaxItems", 2, 999, label="Max. items:", callback=self.filter_change)
        gui.checkBox(
            box,
            self,
            "filterSearch",
            label="Apply these filters in search",
            tooltip="If checked, the itemsets are filtered according "
            "to these filter conditions already in the search "
            "phase. \nIf unchecked, the only filters applied "
            "during search are the ones above, "
            "and the itemsets are \nfiltered afterwards only for "
            "display, i.e. only the matching itemsets are shown.",
        )

        gui.rubber(hbox)

        gui.rubber(self.controlArea)
        gui.auto_commit(self.controlArea, self, "autoSend", "Send selection")

        self.filter_change()

    ITEM_DATA_ROLE = Qt.UserRole + 1

    def selectionChanged(self):
        X = self.X
        mapping = self.onehot_mapping
        instances = set()
        where = np.where

        def whole_subtree(node):
            yield node
            for i in range(node.childCount()):
                yield from whole_subtree(node.child(i))

        def itemset(node):
            while node:
                yield node.data(0, self.ITEM_DATA_ROLE)
                node = node.parent()

        def selection_ranges(node):
            n_children = node.childCount()
            if n_children:
                yield (self.tree.indexFromItem(node.child(0)), self.tree.indexFromItem(node.child(n_children - 1)))
            for i in range(n_children):
                yield from selection_ranges(node.child(i))

        nSelectedItemsets = 0
        item_selection = QItemSelection()
        for node in self.tree.selectedItems():
            nodes = (node,) if node.isExpanded() else whole_subtree(node)
            if not node.isExpanded():
                for srange in selection_ranges(node):
                    item_selection.select(*srange)
            for node in nodes:
                nSelectedItemsets += 1
                cols, vals = zip(*(mapping[i] for i in itemset(node)))
                if issparse(X):
                    rows = (len(cols) == np.bincount((X[:, cols] != 0).indices, minlength=X.shape[0])).nonzero()[0]
                else:
                    rows = where((X[:, cols] == vals).all(axis=1))[0]
                instances.update(rows)
        self.tree.itemSelectionChanged.disconnect(self.selectionChanged)
        self.tree.selectionModel().select(item_selection, QItemSelectionModel.Select | QItemSelectionModel.Rows)
        self.tree.itemSelectionChanged.connect(self.selectionChanged)

        self.nSelectedExamples = len(instances)
        self.nSelectedItemsets = nSelectedItemsets
        self.output = self.data[sorted(instances)] or None
        self.commit()

    def commit(self):
        self.send(Output.DATA, self.output)

    def filter_change(self):
        self.warning(9)
        try:
            isRegexMatch = self.isRegexMatch = re.compile(
                "|".join(i.strip() for i in re.split("(,|\s)+", self.filterKeywords.strip()) if i.strip()),
                re.IGNORECASE,
            ).search
        except Exception as e:
            self.warning(9, "Error in regular expression: {}".format(e.args[0]))
            isRegexMatch = self.isRegexMatch = lambda x: True

        def hide(node, depth, has_kw):
            if not has_kw:
                has_kw = isRegexMatch(node.text(0))
            hidden = (
                sum(hide(node.child(i), depth + 1, has_kw) for i in range(node.childCount())) == node.childCount()
                if node.childCount()
                else (not has_kw or not self.filterMinItems <= depth <= self.filterMaxItems)
            )
            node.setHidden(hidden)
            return hidden

        hide(self.tree.invisibleRootItem(), 0, False)

    class TreeWidgetItem(QTreeWidgetItem):
        def data(self, column, role):
            """Construct lazy tooltips"""
            if role != Qt.ToolTipRole:
                return super().data(column, role)
            tooltip = []
            while self:
                tooltip.append(self.text(0))
                self = self.parent()
            return " ".join(reversed(tooltip))

    def find_itemsets(self):
        if self.data is None:
            return
        if self._is_running:
            return
        self._is_running = True

        data = self.data
        self.tree.clear()
        self.tree.setUpdatesEnabled(False)
        self.tree.blockSignals(True)

        class ItemDict(dict):
            def __init__(self, item):
                self.item = item

        top = ItemDict(self.tree.invisibleRootItem())
        X, mapping = OneHot.encode(data)
        self.onehot_mapping = mapping
        ITEM_FMT = "{}" if issparse(data.X) else "{}={}"
        names = {
            item: ITEM_FMT.format(var.name, val) for item, var, val in OneHot.decode(mapping.keys(), data, mapping)
        }
        nItemsets = 0

        filterSearch = self.filterSearch
        filterMinItems, filterMaxItems = self.filterMinItems, self.filterMaxItems
        isRegexMatch = self.isRegexMatch

        # Find itemsets and populate the TreeView
        with self.progressBar(self.maxItemsets + 1) as progress:
            for itemset, support in frequent_itemsets(X, self.minSupport / 100):

                if filterSearch and not filterMinItems <= len(itemset) <= filterMaxItems:
                    continue

                parent = top
                first_new_item = None
                itemset_matches_filter = False

                for item in sorted(itemset):
                    name = names[item]

                    if filterSearch and not itemset_matches_filter:
                        itemset_matches_filter = isRegexMatch(name)

                    child = parent.get(name)
                    if child is None:
                        try:
                            wi = self.TreeWidgetItem(
                                parent.item, [name, str(support), "{:.4g}".format(100 * support / len(data))]
                            )
                        except RuntimeError:
                            # FIXME: When autoFind was in effect and the support
                            # slider was moved, this line excepted with:
                            #     RuntimeError: wrapped C/C++ object of type
                            #                   TreeWidgetItem has been deleted
                            return
                        wi.setData(0, self.ITEM_DATA_ROLE, item)
                        child = parent[name] = ItemDict(wi)

                        if first_new_item is None:
                            first_new_item = (parent, name)
                    parent = child

                if filterSearch and not itemset_matches_filter:
                    parent, name = first_new_item
                    parent.item.removeChild(parent[name].item)
                    del parent[name].item
                    del parent[name]
                else:
                    nItemsets += 1
                    progress.advance()
                if nItemsets >= self.maxItemsets:
                    break

        if not filterSearch:
            self.filter_change()
        self.nItemsets = nItemsets
        self.nSelectedItemsets = 0
        self.nSelectedExamples = 0
        self.tree.expandAll()
        for i in range(self.tree.columnCount()):
            self.tree.resizeColumnToContents(i)
        self.tree.setUpdatesEnabled(True)
        self.tree.blockSignals(False)
        self._is_running = False

    def set_data(self, data):
        self.data = data
        is_error = False
        if data is not None:
            self.warning(0)
            self.error(1)
            self.button.setDisabled(False)
            self.X = data.X
            if issparse(data.X):
                self.X = data.X.tocsc()
            else:
                if not data.domain.has_discrete_attributes():
                    self.error(1, "Discrete features required but data has none.")
                    is_error = True
                    self.button.setDisabled(True)
                elif data.domain.has_continuous_attributes():
                    self.warning(0, "Data has continuous attributes which will be skipped.")
        else:
            self.output = None
            self.commit()
        if self.autoFind and not is_error:
            self.find_itemsets()
예제 #6
0
class MembersWidget(QWidget):
    def __init__(self, parent, logger):
        super(MembersWidget, self).__init__(parent)

        self.logger = logger
        layout = QVBoxLayout(self)
        layout.setSpacing(0)

        self.dropdown_members_dict = {}
        self.dropdown_members_model = DropdownModel(get_peers(), self.logger)
        self.dropdown_members = QComboBox(self)
        self.dropdown_members.setModel(self.dropdown_members_model)

        topLayout = QHBoxLayout()
        topLayout.setSpacing(10)
        topLayout.addWidget(self.dropdown_members, 1)
        self.requestLogsButton = QPushButton("Request Logfiles", self)
        topLayout.addWidget(self.requestLogsButton)
        layout.addLayout(topLayout)

        layout.addWidget(QLabel("Member Information:", self))
        self.memberInformationTable = QTreeWidget(self)
        self.memberInformationTable.setMaximumHeight(65)
        self.memberInformationTable.setSelectionMode(QTreeWidget.NoSelection)
        layout.addWidget(self.memberInformationTable, 0)

        layout.addWidget(QLabel("Send Message:", self))

        sendMessageLayout = QHBoxLayout()
        sendMessageLayout.setSpacing(10)
        messageInput = HistoryLineEdit(self, "Enter a message")
        self.sendMessageButton = QPushButton("Send", self)
        sendMessageLayout.addWidget(messageInput, 1)
        sendMessageLayout.addWidget(self.sendMessageButton)
        layout.addLayout(sendMessageLayout)

        layout.addWidget(QLabel("Log files:", self))
        logSplitter = QSplitter(Qt.Horizontal, self)

        logListWidget = QWidget(self)
        logListLayout = QVBoxLayout(logListWidget)
        logListLayout.setContentsMargins(0, 0, 0, 0)

        self.log_tree_view = QTreeWidget(logSplitter)
        self.log_tree_view.setAlternatingRowColors(True)
        self.log_tree_view.setColumnCount(1)
        self.log_tree_view.setHeaderHidden(True)
        self.log_tree_view.setItemsExpandable(False)
        self.log_tree_view.setIndentation(0)

        logListLayout.addWidget(self.log_tree_view, 1)

        logListBottomLayout = QHBoxLayout()
        self.logSizeLabel = QLabel(logListWidget)
        logListBottomLayout.addWidget(self.logSizeLabel, 1)

        self.clearLogsButton = QPushButton("Clear", logListWidget)
        self.clearLogsButton.setEnabled(False)
        self.clearLogsButton.clicked.connect(self.clearLogs)
        logListBottomLayout.addWidget(self.clearLogsButton, 0)

        logListLayout.addLayout(logListBottomLayout)

        logSplitter.addWidget(logListWidget)

        self.log_area = QTextEdit(logListWidget)
        self.log_area.setLineWrapMode(QTextEdit.WidgetWidth)
        self.log_area.setHorizontalScrollBarPolicy(Qt.ScrollBarAlwaysOff)
        self.log_area.setReadOnly(True)
        logSplitter.addWidget(self.log_area)

        logSplitter.setStretchFactor(0, 0)
        logSplitter.setStretchFactor(1, 1)

        layout.addWidget(logSplitter, 1)

        self.memberSelectionChanged()
        self.log_tree_view.selectionModel().selectionChanged.connect(self.displaySelectedLogfile)
        self.dropdown_members.currentIndexChanged.connect(self.memberSelectionChanged)
        self.requestLogsButton.clicked.connect(self.requestLogClicked)
        self.sendMessageButton.clicked.connect(partial(self.sendMessageToMember, messageInput))
        messageInput.returnPressed.connect(partial(self.sendMessageToMember, messageInput))

        get_notification_center().connectPeerAppended(self.dropdown_members_model.externalRowAppended)
        get_notification_center().connectPeerUpdated(self.dropdown_members_model.externalRowUpdated)
        get_notification_center().connectPeerRemoved(self.dropdown_members_model.externalRowRemoved)
        get_notification_center().connectPeerUpdated(self.updateMemberInformation)

    def destroy_widget(self):
        get_notification_center().disconnectPeerAppended(self.dropdown_members_model.externalRowAppended)
        get_notification_center().disconnectPeerUpdated(self.dropdown_members_model.externalRowUpdated)
        get_notification_center().disconnectPeerRemoved(self.dropdown_members_model.externalRowRemoved)
        get_notification_center().disconnectPeerUpdated(self.updateMemberInformation)

    def listLogfiles(self, basePath, sort=None):
        if sort is None:
            sort = lambda aFile: -self.getLogNumber(aFile)
        logList = [
            os.path.join(basePath, aFile)
            for aFile in os.listdir(basePath)
            if aFile.endswith(".log") and not os.path.isdir(os.path.join(basePath, aFile))
        ]
        return sorted(logList, key=sort)

    def getNumLogsToKeep(self, oldLogFiles, newLogFiles, logOffset):
        oldestNew = None
        for aLogFile in newLogFiles:
            oldestNew, _ = self.getLogDates(aLogFile)
            if oldestNew != None:
                break

        if oldestNew == None:
            # new new log file contains timestamps (they are probably all empty)
            return len(oldLogFiles)

        numToKeep = 0
        while numToKeep < len(oldLogFiles) - logOffset:
            curTime, _ = self.getLogDates(oldLogFiles[numToKeep])
            if curTime == None or curTime < oldestNew:
                # keep empty log files
                numToKeep = numToKeep + 1
            else:
                break
        return numToKeep

    def getLogDates(self, aLogFile):
        with codecs.open(aLogFile, "rb", "utf-8") as logContent:
            logLines = logContent.readlines()
            firstDate = None
            for aLine in logLines:
                firstDate = getLogLineTime(aLine)
                if firstDate != None:
                    break

            lastDate = None
            for aLine in reversed(logLines):
                lastDate = getLogLineTime(aLine)
                if lastDate != None:
                    break

            return firstDate, lastDate

    def getLogNumber(self, aLogFile):
        aLogFile = os.path.basename(aLogFile)
        try:
            return int(aLogFile[: aLogFile.rfind(".")])
        except:
            return -1

    def shiftLogFiles(self, oldLogFiles, numToKeep, shift, logOffset):
        renamedLogfiles = []
        for index, aFile in enumerate(oldLogFiles):
            logNum = self.getLogNumber(aFile)
            if logNum < logOffset:
                # don't touch up-to-date logs
                break
            if index < numToKeep:
                newName = os.path.join(os.path.dirname(aFile), "%d.log" % (logNum + shift))
                renamedLogfiles.append((len(oldLogFiles) - index - 1, aFile, newName))
                os.rename(aFile, newName)
            else:
                os.remove(aFile)
        return renamedLogfiles

    def handleNewLogFiles(self, basePath, tmpPath, logOffset=0):
        oldLogFiles = self.listLogfiles(basePath)
        newLogFiles = self.listLogfiles(tmpPath)

        # check how many log files are actually new
        numToKeep = self.getNumLogsToKeep(oldLogFiles, newLogFiles, logOffset)

        # rename / remove old log files to make room for the new ones
        numNew = len(newLogFiles) - (len(oldLogFiles) - logOffset - numToKeep)
        renamedLogfiles = self.shiftLogFiles(oldLogFiles, numToKeep, numNew, logOffset)

        # move new log files
        addedLogfiles = []
        for index, aLogFile in enumerate(reversed(newLogFiles)):
            shutil.move(aLogFile, basePath)
            if index < numNew:
                addedLogfiles.append((index + logOffset, os.path.join(basePath, os.path.basename(aLogFile))))
        shutil.rmtree(tmpPath, True)

        return numNew, addedLogfiles, renamedLogfiles

    def requestFinished(self):
        self.requestLogsButton.setEnabled(True)
        self.dropdown_members.setEnabled(True)

    @loggingSlot(QThread, object)
    def cb_log_transfer_success(self, thread, path):
        path = convert_string(path)

        basePath = os.path.dirname(path)
        tmpPath = os.path.join(basePath, "tmp")
        if not os.path.exists(tmpPath):
            os.makedirs(tmpPath)

        logsAdded = []
        if path.endswith(".tgz"):
            # extract received log files
            with contextlib.closing(tarfile.open(path, "r:gz")) as tarContent:
                tarContent.extractall(tmpPath)
            _, logsAdded, logsRenamed = self.handleNewLogFiles(basePath, tmpPath)
            self.requestFinished()
        else:
            # log comes from old version
            logNum = 0
            if thread.sender in self.logRequests:
                logNum, requestTime = self.logRequests[thread.sender]
                now = datetime.now()
                td = now - requestTime
                tdSeconds = (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10 ** 6) / 10 ** 6
                if tdSeconds > self.LOG_REQUEST_TIMEOUT:
                    # request timed out or was finished already
                    logNum = 0

            shutil.move(path, os.path.join(tmpPath, "%d.log" % logNum))

            numNew, logsAdded, logsRenamed = self.handleNewLogFiles(basePath, tmpPath, logNum)
            if numNew > 0 and logNum < 9:
                # there might be more new ones
                self.logRequests[thread.sender] = (logNum + 1, datetime.now())
                self.logger.debug("log seems to be new, another!!!")
                logsAdded.append((logNum + 1, None))
                self.request_log(thread.sender, logNum + 1)
            elif thread.sender in self.logRequests:
                # request finished
                del self.logRequests[thread.sender]
                self.requestFinished()
            else:
                self.requestFinished()

        if len(logsAdded) > 0 or len(logsRenamed) > 0:
            self.updateLogList(logsAdded, logsRenamed)

    @loggingSlot(QThread, object)
    def cb_log_transfer_error(self, _thread, message):
        if not self.isVisible():
            return False
        self.log_area.setText("Error while getting log (%s)" % message)
        self.requestFinished()

    def get_selected_log_member(self):
        member = convert_string(self.dropdown_members.currentText())
        if not member:
            return None

        if "(" in member:
            # member contains name, extract ID
            member = member[member.rfind("(") + 1 : member.rfind(")")]

        return member

    def request_log(self, member=None, logNum=0):
        if member == None:
            member = self.get_selected_log_member()
        if member != None:
            self.logger.debug("Requesting log %d from %s", logNum, member)
            get_server().call(
                "HELO_REQUEST_LOGFILE %s %d" % (DataReceiverThread.getOpenPort(category="log%s" % member), logNum),
                set([member]),
            )
        else:
            self.log_area.setText("No Member selected!")

    @loggingSlot()
    def requestLogClicked(self):
        self.requestLogsButton.setEnabled(False)
        self.dropdown_members.setEnabled(False)
        self.updateLogList([(0, None)])
        self.request_log()

    def listLogFilesForMember(self, member):
        if member is None:
            return []
        logDir = os.path.join(get_settings().get_main_config_dir(), "logs", member)
        if not os.path.exists(logDir):
            return []
        return self.listLogfiles(logDir)

    def numLogFilesForMember(self, member):
        return len(self.listLogFilesForMember(member))

    def requestTimedOut(self, item):
        if not sip.isdeleted(item) and item != None and item.data(0, Qt.UserRole) == None:
            self.log_tree_view.takeTopLevelItem(self.log_tree_view.indexFromItem(item).row())
            self.requestFinished()

    def formatFileSize(self, num):
        for x in ["Bytes", "KB", "MB", "GB", "TB"]:
            if num < 1024.0:
                return "%3.1f %s" % (num, x)
            num /= 1024.0

    def initializeLogItem(self, item, logFile):
        firstDate, lastDate = self.getLogDates(logFile)
        text = None
        tooltip = None
        if firstDate != None:
            text = firstDate.strftime("%Y-%m-%d %H:%M:%S")
            tooltip = u"File: %s\nFirst entry: %s\nLast entry: %s" % (
                logFile,
                firstDate.strftime("%Y-%m-%d %H:%M:%S"),
                lastDate.strftime("%Y-%m-%d %H:%M:%S"),
            )
        else:
            timestamp = datetime.fromtimestamp(os.path.getmtime(logFile)).strftime("%Y-%m-%d %H:%M:%S")
            text = u"%s" % os.path.basename(logFile)
            tooltip = u"File:%s\nModification Date: %s" % (logFile, timestamp)
        text = text + "\n%s" % self.formatFileSize(os.path.getsize(logFile))
        if tooltip != None:
            item.setData(0, Qt.ToolTipRole, QVariant(tooltip))
        item.setData(0, Qt.UserRole, logFile)
        item.setData(0, Qt.DisplayRole, QVariant(text))

    @loggingSlot()
    def clearLogs(self):
        for aLogFile in self.listLogFilesForMember(self.get_selected_log_member()):
            os.remove(aLogFile)
        self.updateLogList()

    def updateLogList(self, logsAdded=None, logsRenamed=None):
        selectedMember = self.get_selected_log_member()

        if logsAdded == None:
            self.log_tree_view.clear()
            logsAdded = []
            for index, logFile in enumerate(reversed(self.listLogFilesForMember(selectedMember))):
                logsAdded.append((index, logFile))
            if len(logsAdded) == 0:
                self.log_tree_view.clear()
                self.log_tree_view.addTopLevelItem(
                    QTreeWidgetItem(self.log_tree_view, QStringList("No logs available."))
                )
                self.log_tree_view.setSelectionMode(QTreeWidget.NoSelection)
                self.logSizeLabel.setText("No logs")
                self.clearLogsButton.setEnabled(False)
                return

        if logsRenamed != None:
            for index, oldName, newName in logsRenamed:
                # index + 1 because of the "requesting" item
                item = self.log_tree_view.topLevelItem(index + 1)
                if item != None:
                    itemLogFile = convert_string(item.data(0, Qt.UserRole).toString())
                    if itemLogFile != oldName:
                        self.logger.warning(
                            "index does not correspond to item in list:\n\t%s\n\t%s", itemLogFile, oldName
                        )
                    self.initializeLogItem(item, newName)

        if len(logsAdded) == 0:
            self.log_tree_view.takeTopLevelItem(0)
        else:
            for index, logFile in logsAdded:
                oldItem = self.log_tree_view.topLevelItem(index)
                item = None
                if oldItem != None and oldItem.data(0, Qt.UserRole) == None:
                    # requested item has been received
                    item = oldItem
                else:
                    item = QTreeWidgetItem()
                    oldItem = None

                if logFile == None:
                    item.setData(0, Qt.DisplayRole, QVariant("Requesting..."))
                    QTimer.singleShot(6000, partial(self.requestTimedOut, item))
                else:
                    self.initializeLogItem(item, logFile)

                if oldItem == None:
                    # else, the old item is being modified
                    self.log_tree_view.insertTopLevelItem(index, item)
                self.log_tree_view.setSelectionMode(QTreeWidget.SingleSelection)

        totalSize = 0
        for aLogFile in self.listLogFilesForMember(selectedMember):
            totalSize += os.path.getsize(aLogFile)

        self.logSizeLabel.setText("%s consumed" % self.formatFileSize(totalSize))
        self.clearLogsButton.setEnabled(True)
        # self.displaySelectedLogfile()

    def getSelectedLogContent(self):
        member = self.get_selected_log_member()
        if member == None:
            return "No Log selected."
        selection = self.log_tree_view.selectedIndexes()
        if len(selection) is 0:
            return "No Log selected."

        logPath = convert_string(selection[0].data(Qt.UserRole).toString())
        if logPath == None:
            return "ERROR: path is None"
        if not os.path.exists(logPath):
            return "File not found: " + logPath

        fcontent = ""
        try:
            with codecs.open(logPath, "r", "utf8") as fhandler:
                fcontent = fhandler.read()
        except Exception as e:
            self.logger.exception("Error reading file")
            fcontent = "Error reading file: %s" % str(e)
        return fcontent

    @loggingSlot(QItemSelection, QItemSelection)
    def displaySelectedLogfile(self, _new, _old):
        self.log_area.setText(self.getSelectedLogContent())

    @loggingSlot(int)
    def memberSelectionChanged(self, _new=None):
        self.updateLogList()
        isMemberSelected = self.get_selected_log_member() != None
        self.sendMessageButton.setEnabled(isMemberSelected)
        self.requestLogsButton.setEnabled(isMemberSelected)
        self.updateMemberInformation()

    @loggingSlot(object)
    def sendMessageToMember(self, lineEdit):
        selectedMember = self.get_selected_log_member()
        if selectedMember != None:
            get_server().call(convert_string(lineEdit.text()), set([selectedMember]))
            lineEdit.clear()

    @loggingSlot(object, object)
    def updateMemberInformation(self, peerID=None, peerInfo=None):
        if peerID != None and peerID != self.get_selected_log_member():
            # only update if selected member updated
            return

        self.memberInformationTable.clear()

        if self.get_selected_log_member() == None:
            self.memberInformationTable.setColumnCount(0)
            self.memberInformationTable.setHeaderLabel("No member selected.")
            return

        if peerInfo == None:
            peerInfo = get_peers().getPeerInfo(pID=self.get_selected_log_member())

        if peerInfo == None:
            self.memberInformationTable.setColumnCount(0)
            self.memberInformationTable.setHeaderLabel("No member information available.")
            return

        self.memberInformationTable.setColumnCount(len(peerInfo))
        headers = sorted(peerInfo.keys(), key=lambda s: s.lower())
        self.memberInformationTable.setHeaderLabels(QStringList(headers))
        item = QTreeWidgetItem(self.memberInformationTable)
        for col, header in enumerate(headers):
            item.setData(col, Qt.DisplayRole, QVariant(peerInfo[header]))
        for col in range(self.memberInformationTable.columnCount()):
            self.memberInformationTable.resizeColumnToContents(col)
예제 #7
0
class NetworkArgs(QtHelper.EnhancedQDialog, Logger.ClassLogger):
    """
    Network arguments dialog
    """
    def __init__(self, dataArgs, parent=None):
        """
        Dialog to fill arguments for the network probe

        @param dataArgs: 
        @type dataArgs: 

        @param parent: 
        @type parent:
        """
        super(NetworkArgs, self).__init__(parent)
        self.dataArgs = dataArgs
        self.createDialog()
        self.createConnections()
        self.loadDefaultData()

    def createDialog (self):
        """
        Create qt dialog
        """
        mainLayout = QHBoxLayout()

        paramLayout = QHBoxLayout()
        paramLayout.addWidget( QLabel("Interface:") )
        self.intComboBox = QComboBox()
        self.intComboBox.setEditable(1)
        self.intComboBox.addItems( [ 'any', 'eth0', 'eth1', 'eth2', 'eth3', 'eth4', 'lo0' ] )
        paramLayout.addWidget( self.intComboBox )
        paramLayout.addWidget( QLabel("Filter:") )
        self.filterEdit = QLineEdit()
        paramLayout.addWidget( self.filterEdit )

        dataLayout = QVBoxLayout()
        self.labelHelp = QLabel("Network interface to dump:\nIf the interface name does not exist in the list\nplease to edit the combox list to change it.") 
        
        self.listBox = QTreeWidget(self)
        self.listBox.setIndentation(0)

        self.labels = [ self.tr("Interface"), self.tr("Filter") ]
        self.listBox.setHeaderLabels(self.labels)

        dataLayout.addWidget( self.labelHelp )
        dataLayout.addLayout( paramLayout )
        dataLayout.addWidget( self.listBox )

        buttonLayout = QVBoxLayout()
        self.addButton = QPushButton("Add", self)
        self.delButton = QPushButton("Remove", self)
        self.delButton.setEnabled(False)
        self.editButton = QPushButton("Edit", self)
        self.editButton.setEnabled(False)
        self.clearButton = QPushButton("Clear", self)
        self.okButton = QPushButton("Ok", self)
        self.cancelButton = QPushButton("Cancel", self)
        buttonLayout.addWidget( self.addButton )
        buttonLayout.addWidget( self.delButton )
        buttonLayout.addWidget( self.editButton )
        buttonLayout.addWidget( self.clearButton )
        buttonLayout.addWidget( self.okButton )
        buttonLayout.addWidget( self.cancelButton )

        mainLayout.addLayout( dataLayout ) 
        mainLayout.addLayout( buttonLayout ) 

        self.setLayout(mainLayout)

        self.setWindowTitle("Network Probe > Arguments")
        

    def createConnections (self):
        """
        Create qt connections
        """
        self.okButton.clicked.connect(self.accept)
        self.cancelButton.clicked.connect(self.reject)
        self.addButton.clicked.connect(self.addItem)
        self.delButton.clicked.connect(self.delItem)
        self.editButton.clicked.connect(self.editItem)
        self.clearButton.clicked.connect(self.clearList)
        self.listBox.itemClicked.connect(self.onItemSelected)

    def clearList(self):
        """
        Clear the list
        """
        self.listBox.clear()

    def onItemSelected(self, itm, row):
        """
        Called when an item is selected
        """
        self.delButton.setEnabled(True)
        self.editButton.setEnabled(True)

    def editItem(self):
        """
        Edit item
        """
        self.delButton.setEnabled(False)
        self.editButton.setEnabled(False)
        # retrieve value to put it in the line edit and then remove item 
        model = self.listBox.model()
        for selectedItem in self.listBox.selectedItems():
            qIndex = self.listBox.indexFromItem(selectedItem)
            eth = selectedItem.dataNet['interface']
            flt = selectedItem.dataNet['filter']
            self.filterEdit.setText( flt )
            self.intComboBox.setEditText( eth )
            model.removeRow(qIndex.row())

    def delItem(self):
        """
        Delete item
        """
        self.delButton.setEnabled(False)
        self.editButton.setEnabled(False)
        # remove item
        model = self.listBox.model()
        for selectedItem in self.listBox.selectedItems():
            qIndex = self.listBox.indexFromItem(selectedItem)
            model.removeRow(qIndex.row())

    def addItem(self):
        """
        Add item
        """
        eth = self.intComboBox.currentText()
        if eth != '':
            flt = self.filterEdit.text()
            tpl = {'interface': str(eth), 'filter': str(flt) }
            newParam = NetItem( data=tpl, parent=self.listBox)
            self.listBox.resizeColumnToContents(0)

    def loadDefaultData(self):
        """
        Load the default data
        """
        try:
            if len(self.dataArgs) ==0:
                return
            datObj = eval(str(self.dataArgs))
            if 'interfaces' in datObj:
                for eth in datObj['interfaces']:
                    newParam = NetItem( data=eth, parent=self.listBox)
        except Exception as e:
            self.error( "unable to load the default data: %s" % e )

    def getArgs(self):
        """
        Returns arguments
        Example list of files: {'interfaces': [ {'interface':'any', 'filter':''} ] }
        """
        listEth = []
        # iterate all items in a QListWidget
        root = self.listBox.invisibleRootItem()
        child_count = root.childCount()
        for i in xrange(child_count):
            itm = root.child(i)
            listEth.append( eval(str(itm.dataNet)) )
        ret = {'interfaces': listEth } 
        if len(listEth) == 0:
            ret = ''
        return str(ret)
예제 #8
0
class OWKEGGPathwayBrowser(widget.OWWidget):
    name = "KEGG Pathways"
    description = "Browse KEGG pathways that include an input set of genes."
    icon = "../widgets/icons/KEGGPathways.svg"
    priority = 2030

    inputs = [("Data", Orange.data.Table, "SetData", widget.Default),
              ("Reference", Orange.data.Table, "SetRefData")]
    outputs = [("Selected Data", Orange.data.Table, widget.Default),
               ("Unselected Data", Orange.data.Table)]

    settingsHandler = settings.DomainContextHandler()

    organismIndex = settings.ContextSetting(0)
    geneAttrIndex = settings.ContextSetting(0)
    useAttrNames = settings.ContextSetting(False)

    autoCommit = settings.Setting(False)
    autoResize = settings.Setting(True)
    useReference = settings.Setting(False)
    showOrthology = settings.Setting(True)

    Ready, Initializing, Running = 0, 1, 2

    def __init__(self, parent=None):
        super().__init__(parent)

        self.organismCodes = []
        self._changedFlag = False
        self.__invalidated = False
        self.__runstate = OWKEGGPathwayBrowser.Initializing
        self.__in_setProgress = False

        self.controlArea.setMaximumWidth(250)
        box = gui.widgetBox(self.controlArea, "Info")
        self.infoLabel = gui.widgetLabel(box, "No data on input\n")

        # Organism selection.
        box = gui.widgetBox(self.controlArea, "Organism")
        self.organismComboBox = gui.comboBox(
            box,
            self,
            "organismIndex",
            items=[],
            callback=self.Update,
            addSpace=True,
            tooltip="Select the organism of the input genes")

        # Selection of genes attribute
        box = gui.widgetBox(self.controlArea, "Gene attribute")
        self.geneAttrCandidates = itemmodels.VariableListModel(parent=self)
        self.geneAttrCombo = gui.comboBox(box,
                                          self,
                                          "geneAttrIndex",
                                          callback=self.Update)
        self.geneAttrCombo.setModel(self.geneAttrCandidates)

        gui.checkBox(box,
                     self,
                     "useAttrNames",
                     "Use variable names",
                     disables=[(-1, self.geneAttrCombo)],
                     callback=self.Update)

        self.geneAttrCombo.setDisabled(bool(self.useAttrNames))

        gui.separator(self.controlArea)

        gui.checkBox(self.controlArea,
                     self,
                     "useReference",
                     "From signal",
                     box="Reference",
                     callback=self.Update)

        gui.separator(self.controlArea)

        gui.checkBox(self.controlArea,
                     self,
                     "showOrthology",
                     "Show pathways in full orthology",
                     box="Orthology",
                     callback=self.UpdateListView)

        gui.checkBox(self.controlArea,
                     self,
                     "autoResize",
                     "Resize to fit",
                     box="Image",
                     callback=self.UpdatePathwayViewTransform)

        box = gui.widgetBox(self.controlArea, "Cache Control")

        gui.button(box,
                   self,
                   "Clear cache",
                   callback=self.ClearCache,
                   tooltip="Clear all locally cached KEGG data.",
                   default=False,
                   autoDefault=False)

        gui.separator(self.controlArea)

        gui.auto_commit(self.controlArea, self, "autoCommit", "Commit")

        gui.rubber(self.controlArea)

        spliter = QSplitter(Qt.Vertical, self.mainArea)
        self.pathwayView = PathwayView(self, spliter)
        self.pathwayView.scene().selectionChanged.connect(
            self._onSelectionChanged)
        self.mainArea.layout().addWidget(spliter)

        self.listView = QTreeWidget(allColumnsShowFocus=True,
                                    selectionMode=QTreeWidget.SingleSelection,
                                    sortingEnabled=True,
                                    maximumHeight=200)

        spliter.addWidget(self.listView)

        self.listView.setColumnCount(4)
        self.listView.setHeaderLabels(
            ["Pathway", "P value", "Genes", "Reference"])

        self.listView.itemSelectionChanged.connect(self.UpdatePathwayView)

        select = QAction("Select All", self, shortcut=QKeySequence.SelectAll)
        select.triggered.connect(self.selectAll)
        self.addAction(select)

        self.data = None
        self.refData = None

        self._executor = concurrent.ThreadExecutor()
        self.setEnabled(False)
        self.setBlocking(True)
        progress = concurrent.methodinvoke(self, "setProgress", (float, ))

        def get_genome():
            """Return a KEGGGenome with the common org entries precached."""
            genome = kegg.KEGGGenome()

            essential = genome.essential_organisms()
            common = genome.common_organisms()
            # Remove duplicates of essential from common.
            # (essential + common list as defined here will be used in the
            # GUI.)
            common = [c for c in common if c not in essential]

            # TODO: Add option to specify additional organisms not
            # in the common list.

            keys = list(map(genome.org_code_to_entry_key, essential + common))

            genome.pre_cache(keys, progress_callback=progress)
            return (keys, genome)

        self._genomeTask = task = concurrent.Task(function=get_genome)
        task.finished.connect(self.__initialize_finish)

        self.progressBarInit()
        self.infoLabel.setText("Fetching organism definitions\n")
        self._executor.submit(task)

    def __initialize_finish(self):
        if self.__runstate != OWKEGGPathwayBrowser.Initializing:
            return

        try:
            keys, genome = self._genomeTask.result()
        except Exception as err:
            self.error(0, str(err))
            raise

        self.progressBarFinished()
        self.setEnabled(True)
        self.setBlocking(False)

        entries = [genome[key] for key in keys]
        items = [entry.definition for entry in entries]
        codes = [entry.organism_code for entry in entries]

        self.organismCodes = codes
        self.organismComboBox.clear()
        self.organismComboBox.addItems(items)
        self.organismComboBox.setCurrentIndex(self.organismIndex)

        self.infoLabel.setText("No data on input\n")

    def Clear(self):
        """
        Clear the widget state.
        """
        self.queryGenes = []
        self.referenceGenes = []
        self.genes = {}
        self.uniqueGenesDict = {}
        self.revUniqueGenesDict = {}
        self.pathways = {}
        self.org = None
        self.geneAttrCandidates[:] = []

        self.infoLabel.setText("No data on input\n")
        self.listView.clear()
        self.pathwayView.SetPathway(None)

        self.send("Selected Data", None)
        self.send("Unselected Data", None)

    def SetData(self, data=None):
        if self.__runstate == OWKEGGPathwayBrowser.Initializing:
            self.__initialize_finish()

        self.closeContext()
        self.data = data
        self.warning(0)
        self.error(0)
        self.information(0)

        if data is not None:
            vars = data.domain.variables + data.domain.metas
            vars = [
                var for var in vars
                if isinstance(var, Orange.data.StringVariable)
            ]
            self.geneAttrCandidates[:] = vars

            # Try to guess the gene name variable
            if vars:
                names_lower = [v.name.lower() for v in vars]
                scores = [(name == "gene", "gene" in name)
                          for name in names_lower]
                imax, _ = max(enumerate(scores), key=itemgetter(1))
            else:
                imax = -1

            self.geneAttrIndex = imax

            taxid = data_hints.get_hint(data, "taxid", None)
            if taxid:
                try:
                    code = kegg.from_taxid(taxid)
                    self.organismIndex = self.organismCodes.index(code)
                except Exception as ex:
                    print(ex, taxid)

            self.useAttrNames = data_hints.get_hint(data, "genesinrows",
                                                    self.useAttrNames)
            self.openContext(data)

            if len(self.geneAttrCandidates) == 0:
                self.useAttrNames = True
                self.geneAttrIndex = -1
            else:
                self.geneAttrIndex = min(self.geneAttrIndex,
                                         len(self.geneAttrCandidates) - 1)
        else:
            self.Clear()

        self.__invalidated = True

    def SetRefData(self, data=None):
        self.refData = data
        self.information(1)

        if data is not None and self.useReference:
            self.__invalidated = True

    def handleNewSignals(self):
        if self.__invalidated:
            self.Update()
            self.__invalidated = False

    def UpdateListView(self):
        self.bestPValueItem = None
        self.listView.clear()
        if not self.data:
            return

        allPathways = self.org.pathways()
        allRefPathways = kegg.pathways("map")

        items = []
        kegg_pathways = kegg.KEGGPathways()

        org_code = self.organismCodes[min(self.organismIndex,
                                          len(self.organismCodes) - 1)]

        if self.showOrthology:
            self.koOrthology = kegg.KEGGBrite("ko00001")
            self.listView.setRootIsDecorated(True)
            path_ids = set([s[-5:] for s in self.pathways.keys()])

            def _walkCollect(koEntry):
                num = koEntry.title[:5] if koEntry.title else None
                if num in path_ids:
                    return ([koEntry] +
                            reduce(lambda li, c: li + _walkCollect(c),
                                   [child for child in koEntry.entries], []))
                else:
                    c = reduce(lambda li, c: li + _walkCollect(c),
                               [child for child in koEntry.entries], [])
                    return c + (c and [koEntry] or [])

            allClasses = reduce(lambda li1, li2: li1 + li2,
                                [_walkCollect(c) for c in self.koOrthology],
                                [])

            def _walkCreate(koEntry, lvItem):
                item = QTreeWidgetItem(lvItem)
                id = "path:" + org_code + koEntry.title[:5]

                if koEntry.title[:5] in path_ids:
                    p = kegg_pathways.get_entry(id)
                    if p is None:
                        # In case the genesets still have obsolete entries
                        name = koEntry.title
                    else:
                        name = p.name
                    genes, p_value, ref = self.pathways[id]
                    item.setText(0, name)
                    item.setText(1, "%.5f" % p_value)
                    item.setText(2, "%i of %i" % (len(genes), len(self.genes)))
                    item.setText(3,
                                 "%i of %i" % (ref, len(self.referenceGenes)))
                    item.pathway_id = id if p is not None else None
                else:
                    if id in allPathways:
                        text = kegg_pathways.get_entry(id).name
                    else:
                        text = koEntry.title
                    item.setText(0, text)

                    if id in allPathways:
                        item.pathway_id = id
                    elif "path:map" + koEntry.title[:5] in allRefPathways:
                        item.pathway_id = "path:map" + koEntry.title[:5]
                    else:
                        item.pathway_id = None

                for child in koEntry.entries:
                    if child in allClasses:
                        _walkCreate(child, item)

            for koEntry in self.koOrthology:
                if koEntry in allClasses:
                    _walkCreate(koEntry, self.listView)

            self.listView.update()
        else:
            self.listView.setRootIsDecorated(False)
            pathways = self.pathways.items()
            pathways = sorted(pathways, key=lambda item: item[1][1])

            for id, (genes, p_value, ref) in pathways:
                item = QTreeWidgetItem(self.listView)
                item.setText(0, kegg_pathways.get_entry(id).name)
                item.setText(1, "%.5f" % p_value)
                item.setText(2, "%i of %i" % (len(genes), len(self.genes)))
                item.setText(3, "%i of %i" % (ref, len(self.referenceGenes)))
                item.pathway_id = id
                items.append(item)

        self.bestPValueItem = items and items[0] or None
        self.listView.expandAll()
        for i in range(4):
            self.listView.resizeColumnToContents(i)

        if self.bestPValueItem:
            index = self.listView.indexFromItem(self.bestPValueItem)
            self.listView.selectionModel().select(
                index, QItemSelectionModel.ClearAndSelect)

    def UpdatePathwayView(self):
        items = self.listView.selectedItems()

        if len(items) > 0:
            item = items[0]
        else:
            item = None

        self.commit()
        item = item or self.bestPValueItem
        if not item or not item.pathway_id:
            self.pathwayView.SetPathway(None)
            return

        def get_kgml_and_image(pathway_id):
            """Return an initialized KEGGPathway with pre-cached data"""
            p = kegg.KEGGPathway(pathway_id)
            p._get_kgml()  # makes sure the kgml file is downloaded
            p._get_image_filename()  # makes sure the image is downloaded
            return (pathway_id, p)

        self.setEnabled(False)
        self._pathwayTask = concurrent.Task(
            function=lambda: get_kgml_and_image(item.pathway_id))
        self._pathwayTask.finished.connect(self._onPathwayTaskFinshed)
        self._executor.submit(self._pathwayTask)

    def _onPathwayTaskFinshed(self):
        self.setEnabled(True)
        pathway_id, self.pathway = self._pathwayTask.result()
        self.pathwayView.SetPathway(self.pathway,
                                    self.pathways.get(pathway_id, [[]])[0])

    def UpdatePathwayViewTransform(self):
        self.pathwayView.updateTransform()

    def Update(self):
        """
        Update (recompute enriched pathways) the widget state.
        """
        if not self.data:
            return

        self.error(0)
        self.information(0)

        # XXX: Check data in setData, do not even alow this to be executed if
        # data has no genes
        try:
            genes = self.GeneNamesFromData(self.data)
        except ValueError:
            self.error(0, "Cannot extract gene names from input.")
            genes = []

        if not self.useAttrNames and any("," in gene for gene in genes):
            genes = reduce(add, (split_and_strip(gene, ",") for gene in genes),
                           [])
            self.information(
                0, "Separators detected in input gene names. "
                "Assuming multiple genes per instance.")
        self.queryGenes = genes

        self.information(1)
        reference = None
        if self.useReference and self.refData:
            reference = self.GeneNamesFromData(self.refData)
            if not self.useAttrNames \
                    and any("," in gene for gene in reference):
                reference = reduce(add, (split_and_strip(gene, ",")
                                         for gene in reference), [])
                self.information(
                    1, "Separators detected in reference gene "
                    "names. Assuming multiple genes per "
                    "instance.")

        org_code = self.SelectedOrganismCode()

        def run_enrichment(org_code, genes, reference=None, progress=None):
            org = kegg.KEGGOrganism(org_code)
            if reference is None:
                reference = org.get_genes()

            # Map 'genes' and 'reference' sets to unique KEGG identifiers
            unique_genes, _, _ = org.get_unique_gene_ids(set(genes))
            unique_ref_genes, _, _ = org.get_unique_gene_ids(set(reference))

            taxid = kegg.to_taxid(org.org_code)
            # Map the taxid back to standard 'common' taxids
            # (as used by 'geneset') if applicable
            r_tax_map = dict(
                (v, k) for k, v in kegg.KEGGGenome.TAXID_MAP.items())
            if taxid in r_tax_map:
                taxid = r_tax_map[taxid]

            # We use the kegg pathway gene sets provided by 'geneset' for
            # the enrichment calculation.

            # Ensure we are using the latest genesets
            # TODO: ?? Is updating the index enough?
            serverfiles.update(geneset.sfdomain, "index.pck")
            kegg_gs_collections = geneset.collections(
                (("KEGG", "pathways"), taxid))

            pathways = pathway_enrichment(kegg_gs_collections,
                                          unique_genes.keys(),
                                          unique_ref_genes.keys(),
                                          callback=progress)
            # Ensure that pathway entries are pre-cached for later use in the
            # list/tree view
            kegg_pathways = kegg.KEGGPathways()
            kegg_pathways.pre_cache(pathways.keys(),
                                    progress_callback=progress)

            return pathways, org, unique_genes, unique_ref_genes

        self.progressBarInit()
        self.setEnabled(False)
        self.infoLabel.setText("Retrieving...\n")

        progress = concurrent.methodinvoke(self, "setProgress", (float, ))
        self._enrichTask = concurrent.Task(function=lambda: run_enrichment(
            org_code, genes, reference, progress))
        self._enrichTask.finished.connect(self._onEnrichTaskFinished)
        self._executor.submit(self._enrichTask)

    def _onEnrichTaskFinished(self):
        self.setEnabled(True)
        self.setBlocking(False)
        try:
            pathways, org, unique_genes, unique_ref_genes = \
                self._enrichTask.result()
        except Exception:
            raise

        self.progressBarFinished()

        self.org = org
        self.genes = unique_genes.keys()
        self.uniqueGenesDict = unique_genes
        self.revUniqueGenesDict = dict([
            (val, key) for key, val in self.uniqueGenesDict.items()
        ])
        self.referenceGenes = unique_ref_genes.keys()
        self.pathways = pathways

        if not self.pathways:
            self.warning(0, "No enriched pathways found.")
        else:
            self.warning(0)

        count = len(set(self.queryGenes))
        self.infoLabel.setText("%i unique gene names on input\n"
                               "%i (%.1f%%) genes names matched" %
                               (count, len(unique_genes), 100.0 *
                                len(unique_genes) / count if count else 0.0))

        self.UpdateListView()

    @Slot(float)
    def setProgress(self, value):
        if self.__in_setProgress:
            return

        self.__in_setProgress = True
        self.progressBarSet(value)
        self.__in_setProgress = False

    def GeneNamesFromData(self, data):
        """
        Extract and return gene names from `data`.
        """
        if self.useAttrNames:
            genes = [str(v.name).strip() for v in data.domain.attributes]
        elif self.geneAttrCandidates:
            assert 0 <= self.geneAttrIndex < len(self.geneAttrCandidates)
            geneAttr = self.geneAttrCandidates[self.geneAttrIndex]
            genes = [
                str(e[geneAttr]) for e in data if not numpy.isnan(e[geneAttr])
            ]
        else:
            raise ValueError("No gene names in data.")
        return genes

    def SelectedOrganismCode(self):
        """
        Return the selected organism code.
        """
        return self.organismCodes[min(self.organismIndex,
                                      len(self.organismCodes) - 1)]

    def selectAll(self):
        """
        Select all items in the pathway view.
        """
        changed = False
        scene = self.pathwayView.scene()
        with disconnected(scene.selectionChanged, self._onSelectionChanged):
            for item in scene.items():
                if item.flags() & QGraphicsItem.ItemIsSelectable and \
                        not item.isSelected():
                    item.setSelected(True)
                    changed = True
        if changed:
            self._onSelectionChanged()

    def _onSelectionChanged(self):
        # Item selection in the pathwayView/scene has changed
        self.commit()

    def commit(self):
        if self.data:
            selectedItems = self.pathwayView.scene().selectedItems()
            selectedGenes = reduce(
                set.union, [item.marked_objects for item in selectedItems],
                set())

            if self.useAttrNames:
                selected = [
                    self.data.domain[self.uniqueGenesDict[gene]]
                    for gene in selectedGenes
                ]
                #                 newDomain = Orange.data.Domain(selectedVars, 0)
                data = self.data[:, selected]
                #                 data = Orange.data.Table(newDomain, self.data)
                self.send("Selected Data", data)
            elif self.geneAttrCandidates:
                assert 0 <= self.geneAttrIndex < len(self.geneAttrCandidates)
                geneAttr = self.geneAttrCandidates[self.geneAttrIndex]
                selectedIndices = []
                otherIndices = []
                for i, ex in enumerate(self.data):
                    names = [
                        self.revUniqueGenesDict.get(name, None)
                        for name in split_and_strip(str(ex[geneAttr]), ",")
                    ]
                    if any(name and name in selectedGenes for name in names):
                        selectedIndices.append(i)
                    else:
                        otherIndices.append(i)

                if selectedIndices:
                    selected = self.data[selectedIndices]
                else:
                    selected = None

                if otherIndices:
                    other = self.data[otherIndices]
                else:
                    other = None

                self.send("Selected Data", selected)
                self.send("Unselected Data", other)
        else:
            self.send("Selected Data", None)
            self.send("Unselected Data", None)

    def ClearCache(self):
        kegg.caching.clear_cache()

    def onDeleteWidget(self):
        """
        Called before the widget is removed from the canvas.
        """
        super().onDeleteWidget()

        self.org = None
        self._executor.shutdown(wait=False)
        gc.collect()  # Force collection (WHY?)

    def sizeHint(self):
        return QSize(1024, 720)
예제 #9
0
class OWKEGGPathwayBrowser(OWWidget):
    settingsList = ["organismIndex", "geneAttrIndex", "autoCommit",
                    "autoResize", "useReference", "useAttrNames",
                    "showOrthology"]

    contextHandlers = {
        "": DomainContextHandler(
            "",
            [ContextField("organismIndex",
                          DomainContextHandler.Required +
                          DomainContextHandler.IncludeMetaAttributes),
             ContextField("geneAttrIndex",
                          DomainContextHandler.Required +
                          DomainContextHandler.IncludeMetaAttributes),
             ContextField("useAttrNames",
                          DomainContextHandler.Required +
                          DomainContextHandler.IncludeMetaAttributes)]
        )
    }

    def __init__(self, parent=None, signalManager=None, name="KEGG Pathways"):
        OWWidget.__init__(self, parent, signalManager, name, wantGraph=True)
        self.inputs = [("Examples", Orange.data.Table, self.SetData),
                       ("Reference", Orange.data.Table, self.SetRefData)]
        self.outputs = [("Selected Examples", Orange.data.Table),
                        ("Unselected Examples", Orange.data.Table)]
        self.organismIndex = 0
        self.geneAttrIndex = 0
        self.autoCommit = False
        self.autoResize = True
        self.useReference = False
        self.useAttrNames = 0
        self.showOrthology = True

        self.loadSettings()

        self.organismCodes = []
        self._changedFlag = False

        self.controlArea.setMaximumWidth(250)
        box = OWGUI.widgetBox(self.controlArea, "Info")
        self.infoLabel = OWGUI.widgetLabel(box, "No data on input\n")

        # Organism selection.
        box = OWGUI.widgetBox(self.controlArea, "Organism")
        self.organismComboBox = OWGUI.comboBox(
            box, self, "organismIndex",
            items=[],
            callback=self.Update,
            addSpace=True,
            debuggingEnabled=0,
            tooltip="Select the organism of the input genes")

        # Selection of genes attribute
        box = OWGUI.widgetBox(self.controlArea, "Gene attribute")
        self.geneAttrCandidates = VariableListModel(parent=self)
        self.geneAttrCombo = OWGUI.comboBox(
            box, self, "geneAttrIndex", callback=self.Update)
        self.geneAttrCombo.setModel(self.geneAttrCandidates)

        OWGUI.checkBox(box, self, "useAttrNames",
                       "Use variable names",
                       disables=[(-1, self.geneAttrCombo)],
                       callback=self.Update)

        self.geneAttrCombo.setDisabled(bool(self.useAttrNames))

        OWGUI.separator(self.controlArea)

        OWGUI.checkBox(self.controlArea, self, "useReference",
                       "From signal",
                       box="Reference",
                       callback=self.Update)

        OWGUI.separator(self.controlArea)

        OWGUI.checkBox(self.controlArea, self, "showOrthology",
                       "Show pathways in full orthology",
                       box="Orthology",
                       callback=self.UpdateListView)

        OWGUI.checkBox(self.controlArea, self, "autoResize",
                       "Resize to fit",
                       box="Image",
                       callback=self.UpdatePathwayViewTransform)

        box = OWGUI.widgetBox(self.controlArea, "Cache Control")

        OWGUI.button(box, self, "Clear cache",
                     callback=self.ClearCache,
                     tooltip="Clear all locally cached KEGG data.")

        OWGUI.separator(self.controlArea)

        box = OWGUI.widgetBox(self.controlArea, "Selection")
        cb = OWGUI.checkBox(box, self, "autoCommit", "Commit on update")
        button = OWGUI.button(box, self, "Commit", callback=self.Commit,
                              default=True)
        OWGUI.setStopper(self, button, cb, "_changedFlag", self.Commit)

        OWGUI.rubber(self.controlArea)

        spliter = QSplitter(Qt.Vertical, self.mainArea)
        self.pathwayView = PathwayView(self, spliter)
        self.pathwayView.scene().selectionChanged.connect(
            self._onSelectionChanged
        )
        self.mainArea.layout().addWidget(spliter)

        self.listView = QTreeWidget(spliter)
        spliter.addWidget(self.listView)

        self.listView.setAllColumnsShowFocus(1)
        self.listView.setColumnCount(4)
        self.listView.setHeaderLabels(["Pathway", "P value",
                                       "Genes", "Reference"])

        self.listView.setSelectionMode(QTreeWidget.SingleSelection)

        self.listView.setSortingEnabled(True)

        self.listView.setMaximumHeight(200)

        self.connect(self.listView,
                     SIGNAL("itemSelectionChanged()"),
                     self.UpdatePathwayView)

        self.connect(self.graphButton,
                     SIGNAL("clicked()"),
                     self.saveGraph)

        select = QAction(
            "Select All", self,
            shortcut=QKeySequence.SelectAll
        )
        select.triggered.connect(self.selectAll)
        self.addAction(select)

        self.data = None
        self.refData = None

        self.resize(800, 600)

        self.connect(self,
                     SIGNAL("widgetStateChanged(QString, int, QString)"),
                     self.onStateChange)

        self.has_new_data = False
        self.has_new_reference_set = False

        self._executor = ThreadExecutor()
        self.setEnabled(False)
        self.setBlocking(True)
        QTimer.singleShot(0, self._initialize)
        self.infoLabel.setText("Fetching organism definitions\n")

    def _initialize(self):
        # First try to import slumber to see if we can even use the
        # kegg module.
        try:
            import slumber
        except ImportError:
            QMessageBox.warning(self,
                "'slumber' library required.",
                '<p>Please install '
                '<a href="http://pypi.python.org/pypi/slumber">slumber</a> '
                'library to use KEGG Pathways widget.</p>'
            )
            self.infoLabel.setText(
                '<p>Please install '
                '<a href="http://pypi.python.org/pypi/slumber">slumber</a> '
                'library to use KEGG Pathways widget.</p>'
            )
            self.error(0, "Missing slumber/requests library")
            return

        progress = methodinvoke(self, "setProgress", (float,))

        def get_genome():
            """Return a KEGGGenome with the common org entries precached."""
            genome = kegg.KEGGGenome()

            essential = genome.essential_organisms()
            common = genome.common_organisms()
            # Remove duplicates of essential from common.
            # (essential + common list as defined here will be used in the
            # GUI.)
            common = [c for c in common if c not in essential]

            # TODO: Add option to specify additional organisms not
            # in the common list.

            keys = map(genome.org_code_to_entry_key, essential + common)

            genome.pre_cache(keys, progress_callback=progress)
            return (keys, genome)

        self._genomeTask = task = Task(function=get_genome)
        task.finished.connect(self._initializeOrganisms)

        self.progressBarInit()
        self._executor.submit(task)

    def _initializeOrganisms(self):
        self.progressBarFinished()
        try:
            keys, genome = self._genomeTask.result()
        except Exception as err:
            self.error(0, str(err))
            return

        entries = [genome[key] for key in keys]
        items = [entry.definition for entry in entries]
        codes = [entry.organism_code for entry in entries]

        self.organismCodes = codes
        self.organismComboBox.clear()
        self.organismComboBox.addItems(items)
        self.organismComboBox.setCurrentIndex(self.organismIndex)

        self.setEnabled(True)
        self.setBlocking(False)
        self.infoLabel.setText("No data on input\n")

    def Clear(self):
        """
        Clear the widget state.
        """
        self.queryGenes = []
        self.referenceGenes = []
        self.genes = {}
        self.uniqueGenesDict = {}
        self.revUniqueGenesDict = {}
        self.pathways = {}
        self.org = None
        self.geneAttrCandidates[:] = []

        self.infoLabel.setText("No data on input\n")
        self.listView.clear()
        self.pathwayView.SetPathway(None)

        self.send("Selected Examples", None)
        self.send("Unselected Examples", None)

    def SetData(self, data=None):
        self.closeContext()
        self.data = data
        self.warning(0)
        self.error(0)
        self.information(0)

        if data is not None:
            vars = data.domain.variables + data.domain.getmetas().values()
            vars = [var for var in vars
                    if isinstance(var, (Orange.feature.String,
                                        Orange.feature.Discrete))]
            self.geneAttrCandidates[:] = vars

            # Try to guess the gene name variable
            names_lower = [v.name.lower() for v in vars]
            scores = [(name == "gene", "gene" in name)
                      for name in names_lower]
            imax, _ = max(enumerate(scores), key=itemgetter(1))
            self.geneAttrIndex = imax

            taxid = data_hints.get_hint(data, "taxid", None)
            if taxid:
                try:
                    code = kegg.from_taxid(taxid)
                    self.organismIndex = self.organismCodes.index(code)
                except Exception as ex:
                    print ex, taxid

            self.useAttrNames = data_hints.get_hint(data, "genesinrows",
                                                    self.useAttrNames)

            self.openContext("", data)
        else:
            self.Clear()

        self.has_new_data = True

    def SetRefData(self, data=None):
        self.refData = data
        self.information(1)

        self.has_new_reference_set = True

    def handleNewSignals(self):
        if self.has_new_data or (self.has_new_reference_set and \
                                 self.useReference):
            self.Update()

            self.has_new_data = False
            self.has_new_reference_set = False

    def UpdateListView(self):
        self.bestPValueItem = None
        self.listView.clear()
        if not self.data:
            return

        allPathways = self.org.pathways()
        allRefPathways = kegg.pathways("map")

        items = []
        kegg_pathways = kegg.KEGGPathways()

        org_code = self.organismCodes[min(self.organismIndex,
                                          len(self.organismCodes) - 1)]

        if self.showOrthology:
            self.koOrthology = kegg.KEGGBrite("ko00001")
            self.listView.setRootIsDecorated(True)
            path_ids = set([s[-5:] for s in self.pathways.keys()])

            def _walkCollect(koEntry):
                num = koEntry.title[:5] if koEntry.title else None
                if num in path_ids:
                    return ([koEntry] +
                            reduce(lambda li, c: li + _walkCollect(c),
                                   [child for child in koEntry.entries],
                                   []))
                else:
                    c = reduce(lambda li, c: li + _walkCollect(c),
                               [child for child in koEntry.entries],
                               [])
                    return c + (c and [koEntry] or [])

            allClasses = reduce(lambda li1, li2: li1 + li2,
                                [_walkCollect(c) for c in self.koOrthology],
                                [])

            def _walkCreate(koEntry, lvItem):
                item = QTreeWidgetItem(lvItem)
                id = "path:" + org_code + koEntry.title[:5]

                if koEntry.title[:5] in path_ids:
                    p = kegg_pathways.get_entry(id)
                    if p is None:
                        # In case the genesets still have obsolete entries
                        name = koEntry.title
                    else:
                        name = p.name
                    genes, p_value, ref = self.pathways[id]
                    item.setText(0, name)
                    item.setText(1, "%.5f" % p_value)
                    item.setText(2, "%i of %i" % (len(genes), len(self.genes)))
                    item.setText(3, "%i of %i" % (ref, len(self.referenceGenes)))
                    item.pathway_id = id if p is not None else None
                else:
                    if id in allPathways:
                        text = kegg_pathways.get_entry(id).name
                    else:
                        text = koEntry.title
                    item.setText(0, text)

                    if id in allPathways:
                        item.pathway_id = id
                    elif "path:map" + koEntry.title[:5] in allRefPathways:
                        item.pathway_id = "path:map" + koEntry.title[:5]
                    else:
                        item.pathway_id = None

                for child in koEntry.entries:
                    if child in allClasses:
                        _walkCreate(child, item)

            for koEntry in self.koOrthology:
                if koEntry in allClasses:
                    _walkCreate(koEntry, self.listView)

            self.listView.update()
        else:
            self.listView.setRootIsDecorated(False)
            pathways = self.pathways.items()
            pathways.sort(lambda a, b: cmp(a[1][1], b[1][1]))

            for id, (genes, p_value, ref) in pathways:
                item = QTreeWidgetItem(self.listView)
                item.setText(0, kegg_pathways.get_entry(id).name)
                item.setText(1, "%.5f" % p_value)
                item.setText(2, "%i of %i" % (len(genes), len(self.genes)))
                item.setText(3, "%i of %i" % (ref, len(self.referenceGenes)))
                item.pathway_id = id
                items.append(item)

        self.bestPValueItem = items and items[0] or None
        self.listView.expandAll()
        for i in range(4):
            self.listView.resizeColumnToContents(i)

        if self.bestPValueItem:
            index = self.listView.indexFromItem(self.bestPValueItem)
            self.listView.selectionModel().select(
                index, QItemSelectionModel.ClearAndSelect
            )

    def UpdatePathwayView(self):
        items = self.listView.selectedItems()

        if len(items) > 0:
            item = items[0]
        else:
            item = None

        self.Commit()
        item = item or self.bestPValueItem
        if not item or not item.pathway_id:
            self.pathwayView.SetPathway(None)
            return

        def get_kgml_and_image(pathway_id):
            """Return an initialized KEGGPathway with pre-cached data"""
            p = kegg.KEGGPathway(pathway_id)
            p._get_kgml()  # makes sure the kgml file is downloaded
            p._get_image_filename()  # makes sure the image is downloaded
            return (pathway_id, p)

        self.setEnabled(False)
        self._pathwayTask = Task(
            function=lambda: get_kgml_and_image(item.pathway_id)
        )
        self._pathwayTask.finished.connect(self._onPathwayTaskFinshed)
        self._executor.submit(self._pathwayTask)

    def _onPathwayTaskFinshed(self):
        self.setEnabled(True)
        pathway_id, self.pathway = self._pathwayTask.result()
        self.pathwayView.SetPathway(
            self.pathway,
            self.pathways.get(pathway_id, [[]])[0]
        )

    def UpdatePathwayViewTransform(self):
        self.pathwayView.updateTransform()

    def Update(self):
        """
        Update (recompute enriched pathways) the widget state.
        """
        if not self.data:
            return

        self.error(0)
        self.information(0)

        # XXX: Check data in setData, do not even alow this to be executed if
        # data has no genes
        try:
            genes = self.GeneNamesFromData(self.data)
        except ValueError:
            self.error(0, "Cannot extract gene names from input.")
            genes = []

        if not self.useAttrNames and any("," in gene for gene in genes):
            genes = reduce(add, (split_and_strip(gene, ",")
                                 for gene in genes),
                           [])
            self.information(0,
                             "Separators detected in input gene names. "
                             "Assuming multiple genes per instance.")
        self.queryGenes = genes

        self.information(1)
        reference = None
        if self.useReference and self.refData:
            reference = self.GeneNamesFromData(self.refData)
            if not self.useAttrNames \
                    and any("," in gene for gene in reference):
                reference = reduce(add, (split_and_strip(gene, ",")
                                         for gene in reference),
                                   [])
                self.information(1,
                                 "Separators detected in reference gene "
                                 "names. Assuming multiple genes per "
                                 "instance.")

        org_code = self.SelectedOrganismCode()

        def run_enrichment(org_code, genes, reference=None, progress=None):
            org = kegg.KEGGOrganism(org_code)
            if reference is None:
                reference = org.get_genes()

            # Map 'genes' and 'reference' sets to unique KEGG identifiers
            unique_genes, _, _ = org.get_unique_gene_ids(set(genes))
            unique_ref_genes, _, _ = org.get_unique_gene_ids(set(reference))

            taxid = kegg.to_taxid(org.org_code)
            # Map the taxid back to standard 'common' taxids
            # (as used by 'geneset') if applicable
            r_tax_map = dict((v, k) for k, v in
                             kegg.KEGGGenome.TAXID_MAP.items())
            if taxid in r_tax_map:
                taxid = r_tax_map[taxid]

            # We use the kegg pathway gene sets provided by 'geneset' for
            # the enrichment calculation.

            # Ensure we are using the latest genesets
            # TODO: ?? Is updating the index enough?
            serverfiles.update(geneset.sfdomain, "index.pck")
            kegg_gs_collections = geneset.collections(
                (("KEGG", "pathways"), taxid)
            )

            pathways = pathway_enrichment(
                kegg_gs_collections, unique_genes.keys(),
                unique_ref_genes.keys(),
                callback=progress
            )
            # Ensure that pathway entries are pre-cached for later use in the
            # list/tree view
            kegg_pathways = kegg.KEGGPathways()
            kegg_pathways.pre_cache(
                pathways.keys(), progress_callback=progress
            )

            return pathways, org, unique_genes, unique_ref_genes

        self.progressBarInit()
        self.setEnabled(False)
        self.infoLabel.setText("Retrieving...\n")

        progress = methodinvoke(self, "setProgress", (float,))
        self._enrichTask = Task(
            function=lambda:
                run_enrichment(org_code, genes, reference, progress)
        )
        self._enrichTask.finished.connect(self._onEnrichTaskFinished)
        self._executor.submit(self._enrichTask)

    def _onEnrichTaskFinished(self):
        self.setEnabled(True)
        self.setBlocking(False)

        self.progressBarFinished()
        try:
            pathways, org, unique_genes, unique_ref_genes = \
                self._enrichTask.result()
        except Exception:
            raise

        self.org = org
        self.genes = unique_genes.keys()
        self.uniqueGenesDict = unique_genes
        self.revUniqueGenesDict = dict([(val, key) for key, val in
                                        self.uniqueGenesDict.items()])
        self.referenceGenes = unique_ref_genes.keys()
        self.pathways = pathways

        if not self.pathways:
            self.warning(0, "No enriched pathways found.")
        else:
            self.warning(0)

        count = len(set(self.queryGenes))
        self.infoLabel.setText(
            "%i unique gene names on input\n"
            "%i (%.1f%%) genes names matched" %
            (count, len(unique_genes),
             100.0 * len(unique_genes) / count if count else 0.0)
        )

        self.UpdateListView()

    @pyqtSlot(float)
    def setProgress(self, value):
        self.progressBarValue = value

    def GeneNamesFromData(self, data):
        """
        Extract and return gene names from `data`.
        """
        if self.useAttrNames:
            genes = [str(v.name).strip() for v in data.domain.attributes]
        elif self.geneAttrCandidates:
            index = min(self.geneAttrIndex, len(self.geneAttrCandidates) - 1)
            geneAttr = self.geneAttrCandidates[index]
            genes = [str(e[geneAttr]) for e in data
                     if not e[geneAttr].isSpecial()]
        else:
            raise ValueError("No gene names in data.")
        return genes

    def SelectedOrganismCode(self):
        """
        Return the selected organism code.
        """
        return self.organismCodes[min(self.organismIndex,
                                      len(self.organismCodes) - 1)]

    def selectAll(self):
        """
        Select all items in the pathway view.
        """
        changed = False
        scene = self.pathwayView.scene()
        with disconnected(scene.selectionChanged, self._onSelectionChanged):
            for item in scene.items():
                if item.flags() & QGraphicsItem.ItemIsSelectable and \
                        not item.isSelected():
                    item.setSelected(True)
                    changed = True
        if changed:
            self._onSelectionChanged()

    def _onSelectionChanged(self):
        # Item selection in the pathwayView/scene has changed
        if self.autoCommit:
            self.Commit()
        else:
            self._changedFlag = True

    def Commit(self):
        if self.data:
            selectedItems = self.pathwayView.scene().selectedItems()
            selectedGenes = reduce(set.union, [item.marked_objects
                                               for item in selectedItems],
                                   set())

            if self.useAttrNames:
                selectedVars = [self.data.domain[self.uniqueGenesDict[gene]]
                                for gene in selectedGenes]
                newDomain = Orange.data.Domain(selectedVars, 0)
                data = Orange.data.Table(newDomain, self.data)
                self.send("Selected Examples", data)
            elif self.geneAttrCandidates:
                geneAttr = self.geneAttrCandidates[min(self.geneAttrIndex,
                                                       len(self.geneAttrCandidates) - 1)]
                selectedExamples = []
                otherExamples = []
                for ex in self.data:
                    names = [self.revUniqueGenesDict.get(name, None)
                             for name in split_and_strip(str(ex[geneAttr]), ",")]
                    if any(name and name in selectedGenes for name in names):
                        selectedExamples.append(ex)
                    else:
                        otherExamples.append(ex)

                if selectedExamples:
                    selectedExamples = Orange.data.Table(selectedExamples)
                else:
                    selectedExamples = None

                if otherExamples:
                    otherExamples = Orange.data.Table(otherExamples)
                else:
                    otherExamples = None

                self.send("Selected Examples", selectedExamples)
                self.send("Unselected Examples", otherExamples)
        else:
            self.send("Selected Examples", None)
            self.send("Unselected Examples", None)

    def ClearCache(self):
        from ..kegg import caching
        try:
            caching.clear_cache()
        except Exception, ex:
            QMessageBox.warning(self, "Cache clear", ex.args[0])