Exemple #1
0
class DataModel(object):
    def __init__(self):
        super(DataModel,self).__init__()
        self.servers     = QStandardItemModel(0,3)
        self.selected    = ''
        self.autoconnect = False
        
    def loadSettings(self):
        settings = QSettings()
        servers = settings.value("servers", None)
        self.servers.clear()
        if servers:
            for s in servers:
                self.servers.appendRow([QStandardItem(str(i)) for i in s])
        self.selected    =  str(settings.value("selected",    self.selected))
        self.autoconnect = int(settings.value("autoconnect", self.autoconnect))
        
    def saveSettings(self):
        settings = QSettings()
        servers = []
        if self.servers:
            for row in xrange(self.servers.rowCount()):
                entry = []
                for col in xrange(self.servers.columnCount()):
                    entry += [self.servers.item(row,col).text()]
                servers += [entry]
        if len(servers):
            settings.setValue("servers", servers)
        else:
            settings.setValue("servers", None)
        settings.setValue("selected",     str(self.selected))
        settings.setValue("autoconnect", int(self.autoconnect))

    def selectedServer(self):
        selectedServer = []
        if self.selected:
            result = self.servers.findItems( self.selected)
            if len(result):
                mi = result[0]
                row = mi.row()
                for col in xrange(self.servers.columnCount()):
                    selectedServer += [self.servers.item(row,col).text()]
        return selectedServer
    def find_rules(self):
        if self.data is None: return
        data = self.data
        self.table.model().clear()

        n_examples = len(data)
        NumericItem = self.NumericItem
        StandardItem = self.StandardItem
        filterSearch = self.filterSearch
        itemsetMin = self.filterAntecedentMin + self.filterConsequentMin
        itemsetMax = self.filterAntecedentMax + self.filterConsequentMax
        isSizeMatch = self.isSizeMatch
        isRegexMatch = self.isRegexMatch

        X, mapping = OneHot.encode(data, self.classify)
        self.onehot_mapping = mapping
        names = {item: '{}={}'.format(var.name, val)
                 for item, var, val in OneHot.decode(mapping, data, mapping)}

        # Items that consequent must include if classifying
        class_items = {item
                       for item, var, val in OneHot.decode(mapping, data, mapping)
                       if var is data.domain.class_var} if self.classify else set()
        assert bool(class_items) == bool(self.classify)

        model = QStandardItemModel(self.table)
        for col, (label, tooltip) in enumerate([("Supp", "Support"),
                                                ("Conf", "Confidence (support / antecedent support)"),
                                                ("Covr", "Coverage (antecedent support / number of examples)"),
                                                ("Strg", "Strength (consequent support / antecedent support)"),
                                                ("Lift", "Lift (number of examples * confidence / consequent support)"),
                                                ("Levr", "Leverage ((support * number of examples - antecedent support * consequent support) / (number of examples)²)"),
                                                ("Antecedent", None),
                                                ("", None),
                                                ("Consequent", None)]):
            item = QStandardItem(label)
            item.setToolTip(tooltip)
            model.setHorizontalHeaderItem(col, item)

        #~ # Aggregate rules by common (support,confidence) for scatterplot
        #~ scatter_agg = defaultdict(list)

        # Find itemsets
        nRules = 0
        itemsets = {}
        progress = gui.ProgressBar(self, self.maxRules + 1)
        for itemset, support in frequent_itemsets(X, self.minSupport / 100):
            itemsets[itemset] = support

            if class_items and not class_items & itemset:
                continue

            # Filter itemset by joined filters before descending into it
            itemset_str = ' '.join(names[i] for i in itemset)
            if (filterSearch and
                (len(itemset) < itemsetMin or
                 itemsetMax < len(itemset) or
                 not isRegexMatch(itemset_str, itemset_str))):
                continue

            for rule in gen_assoc_rules(itemsets,
                                        self.minConfidence / 100,
                                        itemset):
                (left, right), support, confidence = rule

                if class_items and right - class_items:
                    continue
                if filterSearch and not isSizeMatch(len(left), len(right)):
                    continue
                left_str = ' '.join(names[i] for i in sorted(left))
                right_str = ' '.join(names[i] for i in sorted(right))
                if filterSearch and not isRegexMatch(left_str, right_str):
                    continue

                # All filters matched, calculate stats and add table row
                _, _, _, coverage, strength, lift, leverage = next(
                    rules_stats((rule,), itemsets, n_examples))

                support_item = NumericItem(support / n_examples)
                # Set row data on first column
                support_item.setData((itemset - class_items,
                                      class_items and (class_items & itemset).pop()),
                                     self.ROW_DATA_ROLE)
                left_item = StandardItem(left_str, len(left))
                left_item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                model.appendRow([support_item,
                                 NumericItem(confidence),
                                 NumericItem(coverage),
                                 NumericItem(strength),
                                 NumericItem(lift),
                                 NumericItem(leverage),
                                 left_item,
                                 StandardItem('→'),
                                 StandardItem(right_str, len(right))])
                #~ scatter_agg[(round(support / n_examples, 2), round(confidence, 2))].append((left, right))
                nRules += 1
                progress.advance()
                if nRules >= self.maxRules:
                    break
            if nRules >= self.maxRules:
                break

        # Populate the TableView
        table = self.table
        table.setHidden(True)
        table.setSortingEnabled(False)
        proxy_model = self.proxy_model
        proxy_model.setSourceModel(model)
        table.setModel(proxy_model)
        for i in range(model.columnCount()):
            table.resizeColumnToContents(i)
        table.setSortingEnabled(True)
        table.setHidden(False)
        progress.finish()

        self.nRules = nRules
        self.nFilteredRules = proxy_model.rowCount()  # TODO: continue; also add in owitemsets
        self.nSelectedRules = 0
        self.nSelectedExamples = 0
Exemple #3
0
class OWTestLearners(widget.OWWidget):
    name = "Test Learners"
    description = ""
    icon = "icons/TestLearners1.svg"
    priority = 100

    inputs = [("Learner", Orange.classification.Learner,
               "set_learner", widget.Multiple),
              ("Data", Orange.data.Table, "set_train_data", widget.Default),
              ("Test Data", Orange.data.Table, "set_test_data")]

    outputs = [("Evaluation Results", testing.Results)]

    #: Resampling/testing types
    KFold, LeaveOneOut, Bootstrap, TestOnTrain, TestOnTest = 0, 1, 2, 3, 4

    #: Selected resampling type
    resampling = settings.Setting(0)
    #: Number of folds for K-fold cross validation
    k_folds = settings.Setting(10)
    #: Number of repeats for bootstrap sampling
    n_repeat = settings.Setting(10)
    #: Bootstrap sampling p
    sample_p = settings.Setting(75)

    def __init__(self, parent=None):
        super().__init__(parent)

        self.train_data = None
        self.test_data = None

        #: An Ordered dictionary with current inputs and their testing
        #: results.
        self.learners = OrderedDict()

        sbox = gui.widgetBox(self.controlArea, "Sampling")
        rbox = gui.radioButtons(
            sbox, self, "resampling", callback=self._param_changed
        )
        gui.appendRadioButton(rbox, "Cross validation")
        ibox = gui.indentedBox(rbox)
        gui.spin(ibox, self, "k_folds", 2, 50, label="Number of folds:",
                 callback=self.kfold_changed)
        gui.appendRadioButton(rbox, "Leave one out")
        gui.appendRadioButton(rbox, "Random sampling")
        ibox = gui.indentedBox(rbox)
        gui.spin(ibox, self, "n_repeat", 2, 50, label="Repeat train/test",
                 callback=self.bootstrap_changed)
        gui.widgetLabel(ibox, "Relative training set size:")
        gui.hSlider(ibox, self, "sample_p", minValue=1, maxValue=100,
                    ticks=20, vertical=False, labelFormat="%d %%",
                    callback=self.bootstrap_changed)

        gui.appendRadioButton(rbox, "Test on train data")
        gui.appendRadioButton(rbox, "Test on test data")

        rbox.layout().addSpacing(5)
        gui.button(rbox, self, "Apply", callback=self.apply)

        gui.rubber(self.controlArea)

        self.view = QTreeView(
            rootIsDecorated=False,
            uniformRowHeights=True,
            wordWrap=True,
            editTriggers=QTreeView.NoEditTriggers
        )
        header = self.view.header()
        header.setResizeMode(QHeaderView.ResizeToContents)
        header.setDefaultAlignment(Qt.AlignCenter)
        header.setStretchLastSection(False)

        self.result_model = QStandardItemModel()
        self.view.setModel(self.result_model)
        self.view.setItemDelegate(ItemDelegate())
        self._update_header()
        box = gui.widgetBox(self.mainArea, "Evaluation Results")
        box.layout().addWidget(self.view)

    def set_learner(self, learner, key):
        if key in self.learners and learner is None:
            del self.learners[key]
        else:
            self.learners[key] = Input(learner, None, ())
        self._update_stats_model()

    def set_train_data(self, data):
        self.error(0)
        if data is not None:
            if data.domain.class_var is None:
                self.error(0, "Train data input requires a class variable")
                data = None

        self.train_data = data
        self._update_header()
        self._invalidate()

    def set_test_data(self, data):
        self.error(1)
        if data is not None:
            if data.domain.class_var is None:
                self.error(1, "Test data input requires a class variable")
                data = None

        self.test_data = data
        if self.resampling == OWTestLearners.TestOnTest:
            self._invalidate()

    def handleNewSignals(self):
        self.update_results()
        self.commit()

    def kfold_changed(self):
        self.resampling = OWTestLearners.KFold
        self._param_changed()

    def bootstrap_changed(self):
        self.resampling = OWTestLearners.Bootstrap
        self._param_changed()

    def _param_changed(self):
        self._invalidate()

    def update_results(self):
        self.warning([1, 2])
        self.error(2)

        if self.train_data is None:
            return

        if self.resampling == OWTestLearners.TestOnTest:
            if self.test_data is None:
                self.warning(2, "Missing separate test data input")
                return

            elif self.test_data.domain.class_var != \
                    self.train_data.domain.class_var:
                self.error(2, ("Inconsistent class variable between test " +
                               "and train data sets"))
                return

        # items in need of an update
        items = [(key, input) for key, input in self.learners.items()
                 if input.results is None]
        learners = [input.learner for _, input in items]

        self.setStatusMessage("Running")
        if self.test_data is not None and \
                self.resampling != OWTestLearners.TestOnTest:
            self.warning(1, "Test data is present but unused. "
                            "Select 'Test on test data' to use it.")

        # TODO: Test each learner individually

        if self.resampling == OWTestLearners.KFold:
            results = testing.CrossValidation(
                self.train_data, learners, k=self.k_folds, store_data=True
            )
        elif self.resampling == OWTestLearners.LeaveOneOut:
            results = testing.LeaveOneOut(
                self.train_data, learners, store_data=True
            )
        elif self.resampling == OWTestLearners.Bootstrap:
            p = self.sample_p / 100.0
            results = testing.Bootstrap(
                self.train_data, learners, n_resamples=self.n_repeat, p=p,
                store_data=True
            )
        elif self.resampling == OWTestLearners.TestOnTrain:
            results = testing.TestOnTrainingData(
                self.train_data, learners, store_data=True
            )
        elif self.resampling == OWTestLearners.TestOnTest:
            if self.test_data is None:
                return
            results = testing.TestOnTestData(
                self.train_data, self.test_data, learners, store_data=True
            )
        else:
            assert False

        results = list(split_by_model(results))
        class_var = self.train_data.domain.class_var
        
        if is_discrete(class_var):
            test_stats = classification_stats
        else:
            test_stats = regression_stats
        
        self._update_header()
        
        stats = [test_stats(res) for res in results]
        for (key, input), res, stat in zip(items, results, stats):
            self.learners[key] = input._replace(results=res, stats=stat)

        self.setStatusMessage("")
        
        self._update_stats_model()

    def _update_header(self):
        headers = ["Method"]
        if self.train_data is not None:
            if is_discrete(self.train_data.domain.class_var):
                headers.extend(classification_stats.headers)
            else:
                headers.extend(regression_stats.headers)
        for i in reversed(range(len(headers),
                                self.result_model.columnCount())):
            self.result_model.takeColumn(i)

        self.result_model.setHorizontalHeaderLabels(headers)

    def _update_stats_model(self):
        model = self.view.model()

        for r in reversed(range(model.rowCount())):
            model.takeRow(r)

        for input in self.learners.values():
            name = learner_name(input.learner)
            row = []
            head = QStandardItem()
            head.setData(name, Qt.DisplayRole)
            row.append(head)
            for stat in input.stats:
                item = QStandardItem()
                item.setData(" {:.3f} ".format(stat[0]), Qt.DisplayRole)
                row.append(item)
            model.appendRow(row)

    def _invalidate(self, which=None):
        if which is None:
            which = self.learners.keys()

        all_keys = list(self.learners.keys())
        model = self.view.model()

        for key in which:
            self.learners[key] = \
                self.learners[key]._replace(results=None, stats=None)

            if key in self.learners:
                row = all_keys.index(key)
                for c in range(1, model.columnCount()):
                    item = model.item(row, c)
                    if item is not None:
                        item.setData(None, Qt.DisplayRole)

    def apply(self):
        self.update_results()
        self.commit()

    def commit(self):
        results = [val.results for val in self.learners.values()
                   if val.results is not None]
        if results:
            combined = results_merge(results)
            combined.learner_names = [learner_name(val.learner)
                                     for val in self.learners.values()]
        else:
            combined = None
        self.send("Evaluation Results", combined)
Exemple #4
0
class OWTestLearners(widget.OWWidget):
    name = "Test & Score"
    description = "Cross-validation accuracy estimation."
    icon = "icons/TestLearners1.svg"
    priority = 100

    inputs = [("Learner", Learner,
               "set_learner", widget.Multiple),
              ("Data", Table, "set_train_data", widget.Default),
              ("Test Data", Table, "set_test_data"),
              ("Preprocessor", Preprocess, "set_preprocessor")]

    outputs = [("Predictions", Orange.data.Table),
               ("Evaluation Results", Orange.evaluation.Results)]

    settingsHandler = settings.ClassValuesContextHandler()

    #: Resampling/testing types
    KFold, LeaveOneOut, ShuffleSplit, TestOnTrain, TestOnTest = 0, 1, 2, 3, 4

    #: Selected resampling type
    resampling = settings.Setting(0)
    #: Number of folds for K-fold cross validation
    k_folds = settings.Setting(10)
    #: Number of repeats for ShuffleSplit sampling
    n_repeat = settings.Setting(10)
    #: ShuffleSplit sampling p
    sample_p = settings.Setting(75)

    TARGET_AVERAGE = "(Average over classes)"
    class_selection = settings.ContextSetting(TARGET_AVERAGE)

    def __init__(self):
        super().__init__()

        self.data = None
        self.test_data = None
        self.preprocessor = None
        self.train_data_missing_vals = False
        self.test_data_missing_vals = False

        #: An Ordered dictionary with current inputs and their testing
        #: results.
        self.learners = OrderedDict()

        sbox = gui.widgetBox(self.controlArea, "Sampling")
        rbox = gui.radioButtons(
            sbox, self, "resampling", callback=self._param_changed
        )
        gui.appendRadioButton(rbox, "Cross validation")
        ibox = gui.indentedBox(rbox)
        gui.spin(ibox, self, "k_folds", 2, 50, label="Number of folds:",
                 callback=self.kfold_changed)
        gui.appendRadioButton(rbox, "Leave one out")
        gui.appendRadioButton(rbox, "Random sampling")
        ibox = gui.indentedBox(rbox)
        gui.spin(ibox, self, "n_repeat", 2, 50, label="Repeat train/test",
                 callback=self.shuffle_split_changed)
        gui.widgetLabel(ibox, "Relative training set size:")
        gui.hSlider(ibox, self, "sample_p", minValue=1, maxValue=99,
                    ticks=20, vertical=False, labelFormat="%d %%",
                    callback=self.shuffle_split_changed)

        gui.appendRadioButton(rbox, "Test on train data")
        gui.appendRadioButton(rbox, "Test on test data")

        rbox.layout().addSpacing(5)
        self.apply_button = gui.button(
            rbox, self, "Apply", callback=self.apply, default=True)

        self.cbox = gui.widgetBox(self.controlArea, "Target class")
        self.class_selection_combo = gui.comboBox(
            self.cbox, self, "class_selection", items=[],
            sendSelectedValue=True, valueType=str,
            callback=self._on_target_class_changed,
            contentsLength=8)

        gui.rubber(self.controlArea)

        self.view = QTreeView(
            rootIsDecorated=False,
            uniformRowHeights=True,
            wordWrap=True,
            editTriggers=QTreeView.NoEditTriggers
        )
        header = self.view.header()
        header.setResizeMode(QHeaderView.ResizeToContents)
        header.setDefaultAlignment(Qt.AlignCenter)
        header.setStretchLastSection(False)

        self.result_model = QStandardItemModel(self)
        self.result_model.setHorizontalHeaderLabels(["Method"])
        self.view.setModel(self.result_model)
        self.view.setItemDelegate(ItemDelegate())

        box = gui.widgetBox(self.mainArea, "Evaluation Results")
        box.layout().addWidget(self.view)

    def sizeHint(self):
        return QSize(780, 1)

    def set_learner(self, learner, key):
        """
        Set the input `learner` for `key`.
        """
        if key in self.learners and learner is None:
            # Removed
            del self.learners[key]
        else:
            self.learners[key] = Input(learner, None, None)
            self._invalidate([key])

    def set_train_data(self, data):
        """
        Set the input training dataset.
        """
        self.error(0)
        self.information(0)
        if data and not data.domain.class_var:
            self.error(0, "Train data input requires a class variable")
            data = None

        if isinstance(data, SqlTable):
            if data.approx_len() < AUTO_DL_LIMIT:
                data = Table(data)
            else:
                self.information(0, "Train data has been sampled")
                data_sample = data.sample_time(1, no_cache=True)
                data_sample.download_data(AUTO_DL_LIMIT, partial=True)
                data = Table(data_sample)

        self.warning(4)
        self.train_data_missing_vals = data is not None and \
                                       np.isnan(data.Y).any()
        if self.train_data_missing_vals or self.test_data_missing_vals:
            self.warning(4, self._get_missing_data_warning(
                self.train_data_missing_vals, self.test_data_missing_vals
            ))
            if data:
                data = RemoveNaNClasses(data)

        self.data = data
        self.closeContext()
        if data is not None:
            self._update_class_selection()
            self.openContext(data.domain.class_var)
        self._invalidate()

    def set_test_data(self, data):
        """
        Set the input separate testing dataset.
        """
        self.error(1)
        self.information(1)
        if data and not data.domain.class_var:
            self.error(1, "Test data input requires a class variable")
            data = None

        if isinstance(data, SqlTable):
            if data.approx_len() < AUTO_DL_LIMIT:
                data = Table(data)
            else:
                self.information(1, "Test data has been sampled")
                data_sample = data.sample_time(1, no_cache=True)
                data_sample.download_data(AUTO_DL_LIMIT, partial=True)
                data = Table(data_sample)

        self.warning(4)
        self.test_data_missing_vals = data is not None and \
                                      np.isnan(data.Y).any()
        if self.train_data_missing_vals or self.test_data_missing_vals:
            self.warning(4, self._get_missing_data_warning(
                self.train_data_missing_vals, self.test_data_missing_vals
            ))
            if data:
                data = RemoveNaNClasses(data)

        self.test_data = data
        if self.resampling == OWTestLearners.TestOnTest:
            self._invalidate()

    def _get_missing_data_warning(self, train_missing, test_missing):
        return "Instances with unknown target values were removed from{}data"\
            .format(train_missing * test_missing * " "
                    or train_missing * " train " or test_missing * " test ")

    def set_preprocessor(self, preproc):
        """
        Set the input preprocessor to apply on the training data.
        """
        self.preprocessor = preproc
        self._invalidate()

    def handleNewSignals(self):
        """Reimplemented from OWWidget.handleNewSignals."""
        self._update_class_selection()
        self.apply()

    def kfold_changed(self):
        self.resampling = OWTestLearners.KFold
        self._param_changed()

    def shuffle_split_changed(self):
        self.resampling = OWTestLearners.ShuffleSplit
        self._param_changed()

    def _param_changed(self):
        self._invalidate()

    def _update_results(self):
        """
        Run/evaluate the learners.
        """
        self.warning([1, 2])
        self.error(2)

        if self.data is None:
            return

        class_var = self.data.domain.class_var

        if self.resampling == OWTestLearners.TestOnTest:
            if self.test_data is None:
                self.warning(2, "Missing separate test data input")
                return
            elif self.test_data.domain.class_var != class_var:
                self.error(2, ("Inconsistent class variable between test " +
                               "and train data sets"))
                return

        # items in need of an update
        items = [(key, slot) for key, slot in self.learners.items()
                 if slot.results is None]
        learners = [slot.learner for _, slot in items]
        if len(items) == 0:
            return

        if self.test_data is not None and \
                self.resampling != OWTestLearners.TestOnTest:
            self.warning(1, "Test data is present but unused. "
                            "Select 'Test on test data' to use it.")

        rstate = 42
        def update_progress(finished):
            self.progressBarSet(100 * finished)
        common_args = dict(
            store_data=True,
            preprocessor=self.preprocessor,
            callback=update_progress)
        self.setStatusMessage("Running")
        self.progressBarInit()

        try:
            if self.resampling == OWTestLearners.KFold:
                warnings = []
                results = Orange.evaluation.CrossValidation(
                    self.data, learners, k=self.k_folds, random_state=rstate,
                    warnings=warnings, **common_args)
                if warnings:
                    self.warning(2, warnings[0])
            elif self.resampling == OWTestLearners.LeaveOneOut:
                results = Orange.evaluation.LeaveOneOut(
                    self.data, learners, **common_args)
            elif self.resampling == OWTestLearners.ShuffleSplit:
                train_size = self.sample_p / 100
                results = Orange.evaluation.ShuffleSplit(
                    self.data, learners, n_resamples=self.n_repeat,
                    train_size=train_size, test_size=None,
                    random_state=rstate, **common_args)
            elif self.resampling == OWTestLearners.TestOnTrain:
                results = Orange.evaluation.TestOnTrainingData(
                    self.data, learners, **common_args)
            elif self.resampling == OWTestLearners.TestOnTest:
                results = Orange.evaluation.TestOnTestData(
                    self.data, self.test_data, learners, **common_args)
            else:
                assert False
        except RuntimeError as e:
            self.error(2, str(e))
            self.setStatusMessage("")
            self.progressBarFinished()
            return

        learner_key = {slot.learner: key for key, slot in self.learners.items()}
        for learner, result in zip(learners, split_by_model(results)):
            stats = None
            if class_var.is_discrete:
                scorers = classification_stats.scores
            elif class_var.is_continuous:
                scorers = regression_stats.scores
            else:
                scorers = None
            if scorers:
                ex = result.failed[0]
                if ex:
                    stats = [Try.Fail(ex)] * len(scorers)
                    result = Try.Fail(ex)
                else:
                    stats = [Try(lambda: score(result)) for score in scorers]
                    result = Try.Success(result)
            key = learner_key[learner]
            self.learners[key] = \
                self.learners[key]._replace(results=result, stats=stats)

        self.setStatusMessage("")
        self.progressBarFinished()

    def _update_header(self):
        # Set the correct horizontal header labels on the results_model.
        headers = ["Method"]
        if self.data is not None:
            if self.data.domain.has_discrete_class:
                headers.extend(classification_stats.headers)
            else:
                headers.extend(regression_stats.headers)

        # remove possible extra columns from the model.
        for i in reversed(range(len(headers),
                                self.result_model.columnCount())):
            self.result_model.takeColumn(i)

        self.result_model.setHorizontalHeaderLabels(headers)

    def _update_stats_model(self):
        # Update the results_model with up to date scores.
        # Note: The target class specific scores (if requested) are
        # computed as needed in this method.
        model = self.view.model()
        # clear the table model, but preserving the header labels
        for r in reversed(range(model.rowCount())):
            model.takeRow(r)

        target_index = None
        if self.data is not None:
            class_var = self.data.domain.class_var
            if class_var.is_discrete and \
                    self.class_selection != self.TARGET_AVERAGE:
                target_index = class_var.values.index(self.class_selection)
        else:
            class_var = None

        errors = []
        has_missing_scores = False

        for key, slot in self.learners.items():
            name = learner_name(slot.learner)
            head = QStandardItem(name)
            head.setData(key, Qt.UserRole)
            if isinstance(slot.results, Try.Fail):
                head.setToolTip(str(slot.results.exception))
                head.setText("{} (error)".format(name))
                head.setForeground(QtGui.QBrush(Qt.red))
                errors.append("{name} failed with error:\n"
                              "{exc.__class__.__name__}: {exc!s}"
                              .format(name=name, exc=slot.results.exception))

            row = [head]

            if class_var is not None and class_var.is_discrete and \
                    target_index is not None:
                if slot.results is not None and slot.results.success:
                    ovr_results = results_one_vs_rest(
                        slot.results.value, target_index)

                    stats = [Try(lambda: score(ovr_results))
                             for score in classification_stats.scores]
                else:
                    stats = None
            else:
                stats = slot.stats

            if stats is not None:
                for stat in stats:
                    item = QStandardItem()
                    if stat.success:
                        item.setText("{:.3f}".format(stat.value[0]))
                    else:
                        item.setToolTip(str(stat.exception))
                        has_missing_scores = True
                    row.append(item)

            model.appendRow(row)

        if errors:
            self.error(3, "\n".join(errors))
        else:
            self.error(3)

        if has_missing_scores:
            self.warning(3, "Some scores could not be computed")
        else:
            self.warning(3)

    def _update_class_selection(self):
        self.class_selection_combo.setCurrentIndex(-1)
        self.class_selection_combo.clear()
        if not self.data:
            return

        if self.data.domain.has_discrete_class:
            self.cbox.setVisible(True)
            class_var = self.data.domain.class_var
            items = [self.TARGET_AVERAGE] + class_var.values
            self.class_selection_combo.addItems(items)

            class_index = 0
            if self.class_selection in class_var.values:
                class_index = class_var.values.index(self.class_selection) + 1

            self.class_selection_combo.setCurrentIndex(class_index)
            self.class_selection = items[class_index]
        else:
            self.cbox.setVisible(False)

    def _on_target_class_changed(self):
        self._update_stats_model()

    def _invalidate(self, which=None):
        # Invalidate learner results for `which` input keys
        # (if None then all learner results are invalidated)
        if which is None:
            which = self.learners.keys()

        model = self.view.model()
        statmodelkeys = [model.item(row, 0).data(Qt.UserRole)
                         for row in range(model.rowCount())]

        for key in which:
            self.learners[key] = \
                self.learners[key]._replace(results=None, stats=None)

            if key in statmodelkeys:
                row = statmodelkeys.index(key)
                for c in range(1, model.columnCount()):
                    item = model.item(row, c)
                    if item is not None:
                        item.setData(None, Qt.DisplayRole)
                        item.setData(None, Qt.ToolTipRole)

        self.apply_button.setEnabled(True)

    def apply(self):
        self.apply_button.setEnabled(False)
        self._update_header()
        # Update the view to display the model names
        self._update_stats_model()
        self._update_results()
        self._update_stats_model()
        self.commit()

    def commit(self):
        valid = [slot for slot in self.learners.values()
                 if slot.results is not None and slot.results.success]
        if valid:
            # Evaluation results
            combined = results_merge([slot.results.value for slot in valid])
            combined.learner_names = [learner_name(slot.learner)
                                      for slot in valid]

            # Predictions & Probabilities
            predictions = combined.get_augmented_data(combined.learner_names)
        else:
            combined = None
            predictions = None
        self.send("Evaluation Results", combined)
        self.send("Predictions", predictions)
    def find_rules(self):
        if self.data is None or not len(self.data):
            return
        if self._is_running:
            return
        self._is_running = True
        data = self.data
        self.table.model().clear()

        n_examples = len(data)
        NumericItem = self.NumericItem
        StandardItem = self.StandardItem
        filterSearch = self.filterSearch
        itemsetMin = self.filterAntecedentMin + self.filterConsequentMin
        itemsetMax = self.filterAntecedentMax + self.filterConsequentMax
        isSizeMatch = self.isSizeMatch
        isRegexMatch = self.isRegexMatch

        X, mapping = OneHot.encode(data, self.classify)
        self.error(911)
        if X is None:
            self.error(911, 'Need some discrete data to work with.')

        self.onehot_mapping = mapping
        ITEM_FMT = '{}' if issparse(data.X) else '{}={}'
        names = {
            item:
            ('{}={}' if var is data.domain.class_var else ITEM_FMT).format(
                var.name, val)
            for item, var, val in OneHot.decode(mapping, data, mapping)
        }
        # Items that consequent must include if classifying
        class_items = {
            item
            for item, var, val in OneHot.decode(mapping, data, mapping)
            if var is data.domain.class_var
        } if self.classify else set()
        assert bool(class_items) == bool(self.classify)

        model = QStandardItemModel(self.table)
        for col, (label, tooltip) in enumerate([
            ("Supp", "Support"),
            ("Conf", "Confidence (support / antecedent support)"),
            ("Covr", "Coverage (antecedent support / number of examples)"),
            ("Strg", "Strength (consequent support / antecedent support)"),
            ("Lift",
             "Lift (number of examples * confidence / consequent support)"),
            ("Levr",
             "Leverage ((support * number of examples - antecedent support * consequent support) / (number of examples)²)"
             ), ("Antecedent", None), ("", None), ("Consequent", None)
        ]):
            item = QStandardItem(label)
            item.setToolTip(tooltip)
            model.setHorizontalHeaderItem(col, item)

        #~ # Aggregate rules by common (support,confidence) for scatterplot
        #~ scatter_agg = defaultdict(list)

        # Find itemsets
        nRules = 0
        itemsets = {}
        with self.progressBar(self.maxRules + 1) as progress:
            for itemset, support in frequent_itemsets(X,
                                                      self.minSupport / 100):
                itemsets[itemset] = support

                if class_items and not class_items & itemset:
                    continue

                # Filter itemset by joined filters before descending into it
                itemset_str = ' '.join(names[i] for i in itemset)
                if (filterSearch and
                    (len(itemset) < itemsetMin or itemsetMax < len(itemset)
                     or not isRegexMatch(itemset_str, itemset_str))):
                    continue

                for rule in association_rules(itemsets,
                                              self.minConfidence / 100,
                                              itemset):
                    left, right, support, confidence = rule

                    if class_items and right - class_items:
                        continue
                    if filterSearch and not isSizeMatch(len(left), len(right)):
                        continue
                    left_str = ', '.join(names[i] for i in sorted(left))
                    right_str = ', '.join(names[i] for i in sorted(right))
                    if filterSearch and not isRegexMatch(left_str, right_str):
                        continue

                    # All filters matched, calculate stats and add table row
                    _, _, _, _, coverage, strength, lift, leverage = next(
                        rules_stats((rule, ), itemsets, n_examples))

                    support_item = NumericItem(support / n_examples)
                    # Set row data on first column
                    support_item.setData(
                        (itemset - class_items, class_items and
                         (class_items & itemset).pop()), self.ROW_DATA_ROLE)
                    left_item = StandardItem(left_str, len(left))
                    left_item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter)
                    model.appendRow([
                        support_item,
                        NumericItem(confidence),
                        NumericItem(coverage),
                        NumericItem(strength),
                        NumericItem(lift),
                        NumericItem(leverage), left_item,
                        StandardItem('→'),
                        StandardItem(right_str, len(right))
                    ])
                    #~ scatter_agg[(round(support / n_examples, 2), round(confidence, 2))].append((left, right))
                    nRules += 1
                    progress.advance()
                    if nRules >= self.maxRules:
                        break
                if nRules >= self.maxRules:
                    break

        # Populate the TableView
        table = self.table
        table.setHidden(True)
        table.setSortingEnabled(False)
        proxy_model = self.proxy_model
        proxy_model.setSourceModel(model)
        table.setModel(proxy_model)
        for i in range(model.columnCount()):
            table.resizeColumnToContents(i)
        table.setSortingEnabled(True)
        table.setHidden(False)

        self.nRules = nRules
        self.nFilteredRules = proxy_model.rowCount(
        )  # TODO: continue; also add in owitemsets
        self.nSelectedRules = 0
        self.nSelectedExamples = 0
        self._is_running = False
Exemple #6
0
class OWTestLearners(widget.OWWidget):
    name = "Test & Score"
    description = "Cross-validation accuracy estimation."
    icon = "icons/TestLearners1.svg"
    priority = 100

    inputs = [("Learner", Learner, "set_learner", widget.Multiple),
              ("Data", Table, "set_train_data", widget.Default),
              ("Test Data", Table, "set_test_data"),
              ("Preprocessor", Preprocess, "set_preprocessor")]

    outputs = [("Predictions", Orange.data.Table),
               ("Evaluation Results", Orange.evaluation.Results)]

    settingsHandler = settings.ClassValuesContextHandler()

    #: Resampling/testing types
    KFold, ShuffleSplit, LeaveOneOut, TestOnTrain, TestOnTest = 0, 1, 2, 3, 4

    #: Selected resampling type
    resampling = settings.Setting(0)
    #: Number of folds for K-fold cross validation
    k_folds = settings.Setting(10)
    #: Stratified sampling for K-fold
    cv_stratified = settings.Setting(True)
    #: Number of repeats for ShuffleSplit sampling
    n_repeat = settings.Setting(10)
    #: ShuffleSplit sampling p
    sample_p = settings.Setting(75)
    #: Stratified sampling for Random Sampling
    shuffle_stratified = settings.Setting(True)

    TARGET_AVERAGE = "(Average over classes)"
    class_selection = settings.ContextSetting(TARGET_AVERAGE)

    def __init__(self):
        super().__init__()

        self.data = None
        self.test_data = None
        self.preprocessor = None
        self.train_data_missing_vals = False
        self.test_data_missing_vals = False

        #: An Ordered dictionary with current inputs and their testing
        #: results.
        self.learners = OrderedDict()

        sbox = gui.widgetBox(self.controlArea, "Sampling")
        rbox = gui.radioButtons(sbox,
                                self,
                                "resampling",
                                callback=self._param_changed)

        gui.appendRadioButton(rbox, "Cross validation")
        ibox = gui.indentedBox(rbox)
        gui.spin(ibox,
                 self,
                 "k_folds",
                 2,
                 50,
                 label="Number of folds:",
                 callback=self.kfold_changed)
        gui.checkBox(ibox,
                     self,
                     "cv_stratified",
                     "Stratified",
                     callback=self.kfold_changed)

        gui.appendRadioButton(rbox, "Random sampling")
        ibox = gui.indentedBox(rbox)
        gui.spin(ibox,
                 self,
                 "n_repeat",
                 2,
                 50,
                 label="Repeat train/test",
                 callback=self.shuffle_split_changed)
        gui.widgetLabel(ibox, "Relative training set size:")
        gui.hSlider(ibox,
                    self,
                    "sample_p",
                    minValue=1,
                    maxValue=99,
                    ticks=20,
                    vertical=False,
                    labelFormat="%d %%",
                    callback=self.shuffle_split_changed)
        gui.checkBox(ibox,
                     self,
                     "shuffle_stratified",
                     "Stratified",
                     callback=self.shuffle_split_changed)

        gui.appendRadioButton(rbox, "Leave one out")

        gui.appendRadioButton(rbox, "Test on train data")
        gui.appendRadioButton(rbox, "Test on test data")

        rbox.layout().addSpacing(5)
        self.apply_button = gui.button(rbox,
                                       self,
                                       "Apply",
                                       callback=self.apply,
                                       default=True)

        self.cbox = gui.widgetBox(self.controlArea, "Target class")
        self.class_selection_combo = gui.comboBox(
            self.cbox,
            self,
            "class_selection",
            items=[],
            sendSelectedValue=True,
            valueType=str,
            callback=self._on_target_class_changed,
            contentsLength=8)

        gui.rubber(self.controlArea)

        self.view = QTreeView(rootIsDecorated=False,
                              uniformRowHeights=True,
                              wordWrap=True,
                              editTriggers=QTreeView.NoEditTriggers)
        header = self.view.header()
        header.setResizeMode(QHeaderView.ResizeToContents)
        header.setDefaultAlignment(Qt.AlignCenter)
        header.setStretchLastSection(False)

        self.result_model = QStandardItemModel(self)
        self.result_model.setHorizontalHeaderLabels(["Method"])
        self.view.setModel(self.result_model)
        self.view.setItemDelegate(ItemDelegate())

        box = gui.widgetBox(self.mainArea, "Evaluation Results")
        box.layout().addWidget(self.view)

    def sizeHint(self):
        return QSize(780, 1)

    def set_learner(self, learner, key):
        """
        Set the input `learner` for `key`.
        """
        if key in self.learners and learner is None:
            # Removed
            del self.learners[key]
        else:
            self.learners[key] = Input(learner, None, None)
            self._invalidate([key])

    def set_train_data(self, data):
        """
        Set the input training dataset.
        """
        self.error(0)
        self.information(0)
        if data and not data.domain.class_var:
            self.error(0, "Train data input requires a class variable")
            data = None

        if isinstance(data, SqlTable):
            if data.approx_len() < AUTO_DL_LIMIT:
                data = Table(data)
            else:
                self.information(0, "Train data has been sampled")
                data_sample = data.sample_time(1, no_cache=True)
                data_sample.download_data(AUTO_DL_LIMIT, partial=True)
                data = Table(data_sample)

        self.warning(4)
        self.train_data_missing_vals = data is not None and \
                                       np.isnan(data.Y).any()
        if self.train_data_missing_vals or self.test_data_missing_vals:
            self.warning(
                4,
                self._get_missing_data_warning(self.train_data_missing_vals,
                                               self.test_data_missing_vals))
            if data:
                data = RemoveNaNClasses(data)

        self.data = data
        self.closeContext()
        if data is not None:
            self._update_class_selection()
            self.openContext(data.domain.class_var)
        self._invalidate()

    def set_test_data(self, data):
        """
        Set the input separate testing dataset.
        """
        self.error(1)
        self.information(1)
        if data and not data.domain.class_var:
            self.error(1, "Test data input requires a class variable")
            data = None

        if isinstance(data, SqlTable):
            if data.approx_len() < AUTO_DL_LIMIT:
                data = Table(data)
            else:
                self.information(1, "Test data has been sampled")
                data_sample = data.sample_time(1, no_cache=True)
                data_sample.download_data(AUTO_DL_LIMIT, partial=True)
                data = Table(data_sample)

        self.warning(4)
        self.test_data_missing_vals = data is not None and \
                                      np.isnan(data.Y).any()
        if self.train_data_missing_vals or self.test_data_missing_vals:
            self.warning(
                4,
                self._get_missing_data_warning(self.train_data_missing_vals,
                                               self.test_data_missing_vals))
            if data:
                data = RemoveNaNClasses(data)

        self.test_data = data
        if self.resampling == OWTestLearners.TestOnTest:
            self._invalidate()

    def _get_missing_data_warning(self, train_missing, test_missing):
        return "Instances with unknown target values were removed from{}data"\
            .format(train_missing * test_missing * " "
                    or train_missing * " train " or test_missing * " test ")

    def set_preprocessor(self, preproc):
        """
        Set the input preprocessor to apply on the training data.
        """
        self.preprocessor = preproc
        self._invalidate()

    def handleNewSignals(self):
        """Reimplemented from OWWidget.handleNewSignals."""
        self._update_class_selection()
        self.apply()

    def kfold_changed(self):
        self.resampling = OWTestLearners.KFold
        self._param_changed()

    def shuffle_split_changed(self):
        self.resampling = OWTestLearners.ShuffleSplit
        self._param_changed()

    def _param_changed(self):
        self._invalidate()

    def _update_results(self):
        """
        Run/evaluate the learners.
        """
        self.warning([1, 2])
        self.error([2, 4])
        if self.data is None:
            return

        class_var = self.data.domain.class_var

        if self.resampling == OWTestLearners.TestOnTest:
            if self.test_data is None:
                self.warning(2, "Missing separate test data input")
                return
            elif self.test_data.domain.class_var != class_var:
                self.error(2, ("Inconsistent class variable between test " +
                               "and train data sets"))
                return

        # items in need of an update
        items = [(key, slot) for key, slot in self.learners.items()
                 if slot.results is None]
        learners = [slot.learner for _, slot in items]
        if len(items) == 0:
            return

        if self.test_data is not None and \
                self.resampling != OWTestLearners.TestOnTest:
            self.warning(
                1, "Test data is present but unused. "
                "Select 'Test on test data' to use it.")

        rstate = 42

        def update_progress(finished):
            self.progressBarSet(100 * finished)

        common_args = dict(store_data=True,
                           preprocessor=self.preprocessor,
                           callback=update_progress)
        self.setStatusMessage("Running")

        with self.progressBar():
            try:
                if self.resampling == OWTestLearners.KFold:
                    if len(self.data) < self.k_folds:
                        self.error(4, "Number of folds exceeds the data size")
                        return

                    warnings = []
                    results = Orange.evaluation.CrossValidation(
                        self.data,
                        learners,
                        k=self.k_folds,
                        random_state=rstate,
                        warnings=warnings,
                        **common_args)
                    if warnings:
                        self.warning(2, warnings[0])
                elif self.resampling == OWTestLearners.LeaveOneOut:
                    results = Orange.evaluation.LeaveOneOut(
                        self.data, learners, **common_args)
                elif self.resampling == OWTestLearners.ShuffleSplit:
                    train_size = self.sample_p / 100
                    results = Orange.evaluation.ShuffleSplit(
                        self.data,
                        learners,
                        n_resamples=self.n_repeat,
                        train_size=train_size,
                        test_size=None,
                        stratified=self.shuffle_stratified,
                        random_state=rstate,
                        **common_args)
                elif self.resampling == OWTestLearners.TestOnTrain:
                    results = Orange.evaluation.TestOnTrainingData(
                        self.data, learners, **common_args)
                elif self.resampling == OWTestLearners.TestOnTest:
                    results = Orange.evaluation.TestOnTestData(
                        self.data, self.test_data, learners, **common_args)
                else:
                    assert False
            except RuntimeError as e:
                self.error(2, str(e))
                self.setStatusMessage("")
                return

        learner_key = {
            slot.learner: key
            for key, slot in self.learners.items()
        }
        for learner, result in zip(learners, split_by_model(results)):
            stats = None
            if class_var.is_discrete:
                scorers = classification_stats.scores
            elif class_var.is_continuous:
                scorers = regression_stats.scores
            else:
                scorers = None
            if scorers:
                ex = result.failed[0]
                if ex:
                    stats = [Try.Fail(ex)] * len(scorers)
                    result = Try.Fail(ex)
                else:
                    stats = [Try(lambda: score(result)) for score in scorers]
                    result = Try.Success(result)
            key = learner_key[learner]
            self.learners[key] = \
                self.learners[key]._replace(results=result, stats=stats)

        self.setStatusMessage("")

    def _update_header(self):
        # Set the correct horizontal header labels on the results_model.
        headers = ["Method"]
        if self.data is not None:
            if self.data.domain.has_discrete_class:
                headers.extend(classification_stats.headers)
            else:
                headers.extend(regression_stats.headers)

        # remove possible extra columns from the model.
        for i in reversed(range(len(headers),
                                self.result_model.columnCount())):
            self.result_model.takeColumn(i)

        self.result_model.setHorizontalHeaderLabels(headers)

    def _update_stats_model(self):
        # Update the results_model with up to date scores.
        # Note: The target class specific scores (if requested) are
        # computed as needed in this method.
        model = self.view.model()
        # clear the table model, but preserving the header labels
        for r in reversed(range(model.rowCount())):
            model.takeRow(r)

        target_index = None
        if self.data is not None:
            class_var = self.data.domain.class_var
            if self.data.domain.has_discrete_class and \
                            self.class_selection != self.TARGET_AVERAGE:
                target_index = class_var.values.index(self.class_selection)
        else:
            class_var = None

        errors = []
        has_missing_scores = False

        for key, slot in self.learners.items():
            name = learner_name(slot.learner)
            head = QStandardItem(name)
            head.setData(key, Qt.UserRole)
            if isinstance(slot.results, Try.Fail):
                head.setToolTip(str(slot.results.exception))
                head.setText("{} (error)".format(name))
                head.setForeground(QtGui.QBrush(Qt.red))
                errors.append("{name} failed with error:\n"
                              "{exc.__class__.__name__}: {exc!s}".format(
                                  name=name, exc=slot.results.exception))

            row = [head]

            if class_var is not None and class_var.is_discrete and \
                    target_index is not None:
                if slot.results is not None and slot.results.success:
                    ovr_results = results_one_vs_rest(slot.results.value,
                                                      target_index)

                    stats = [
                        Try(lambda: score(ovr_results))
                        for score in classification_stats.scores
                    ]
                else:
                    stats = None
            else:
                stats = slot.stats

            if stats is not None:
                for stat in stats:
                    item = QStandardItem()
                    if stat.success:
                        item.setText("{:.3f}".format(stat.value[0]))
                    else:
                        item.setToolTip(str(stat.exception))
                        has_missing_scores = True
                    row.append(item)

            model.appendRow(row)

        if errors:
            self.error(3, "\n".join(errors))
        else:
            self.error(3)

        if has_missing_scores:
            self.warning(3, "Some scores could not be computed")
        else:
            self.warning(3)

    def _update_class_selection(self):
        self.class_selection_combo.setCurrentIndex(-1)
        self.class_selection_combo.clear()
        if not self.data:
            return

        if self.data.domain.has_discrete_class:
            self.cbox.setVisible(True)
            class_var = self.data.domain.class_var
            items = [self.TARGET_AVERAGE] + class_var.values
            self.class_selection_combo.addItems(items)

            class_index = 0
            if self.class_selection in class_var.values:
                class_index = class_var.values.index(self.class_selection) + 1

            self.class_selection_combo.setCurrentIndex(class_index)
            self.class_selection = items[class_index]
        else:
            self.cbox.setVisible(False)

    def _on_target_class_changed(self):
        self._update_stats_model()

    def _invalidate(self, which=None):
        # Invalidate learner results for `which` input keys
        # (if None then all learner results are invalidated)
        if which is None:
            which = self.learners.keys()

        model = self.view.model()
        statmodelkeys = [
            model.item(row, 0).data(Qt.UserRole)
            for row in range(model.rowCount())
        ]

        for key in which:
            self.learners[key] = \
                self.learners[key]._replace(results=None, stats=None)

            if key in statmodelkeys:
                row = statmodelkeys.index(key)
                for c in range(1, model.columnCount()):
                    item = model.item(row, c)
                    if item is not None:
                        item.setData(None, Qt.DisplayRole)
                        item.setData(None, Qt.ToolTipRole)

        self.apply_button.setEnabled(True)

    def apply(self):
        self.apply_button.setEnabled(False)
        self._update_header()
        # Update the view to display the model names
        self._update_stats_model()
        self._update_results()
        self._update_stats_model()
        self.commit()

    def commit(self):
        valid = [
            slot for slot in self.learners.values()
            if slot.results is not None and slot.results.success
        ]
        if valid:
            # Evaluation results
            combined = results_merge([slot.results.value for slot in valid])
            combined.learner_names = [
                learner_name(slot.learner) for slot in valid
            ]

            # Predictions & Probabilities
            predictions = combined.get_augmented_data(combined.learner_names)
        else:
            combined = None
            predictions = None
        self.send("Evaluation Results", combined)
        self.send("Predictions", predictions)

    def send_report(self):
        if not self.data or not self.learners:
            return
        if self.resampling == self.KFold:
            stratified = 'Stratified ' if self.cv_stratified else ''
            items = [
                ("Sampling type",
                 "{}{}-fold Cross validation".format(stratified, self.k_folds))
            ]
        elif self.resampling == self.LeaveOneOut:
            items = [("Sampling type", "Leave one out")]
        elif self.resampling == self.ShuffleSplit:
            stratified = 'Stratified ' if self.shuffle_stratified else ''
            items = [
                ("Sampling type",
                 "{}Shuffle split, {} random samples with {}% data ".format(
                     stratified, self.n_repeat, self.sample_p))
            ]
        elif self.resampling == self.TestOnTrain:
            items = [("Sampling type", "No sampling, test on training data")]
        elif self.resampling == self.TestOnTest:
            items = [("Sampling type", "No sampling, test on testing data")]
        else:
            items = []
        if self.data.domain.has_discrete_class:
            items += [("Target class", self.class_selection.strip("()"))]
        if items:
            self.report_items("Settings", items)
        self.report_table("Scores", self.view)
Exemple #7
0
class OWTestLearners(widget.OWWidget):
    name = "Test Learners"
    description = "Cross-validation accuracy estimation."
    icon = "icons/TestLearners1.svg"
    priority = 100

    inputs = [("Learner", Orange.classification.Learner,
               "set_learner", widget.Multiple),
              ("Data", Orange.data.Table, "set_train_data", widget.Default),
              ("Test Data", Orange.data.Table, "set_test_data")]

    outputs = [("Evaluation Results", Orange.evaluation.Results)]

    settingsHandler = settings.ClassValuesContextHandler()

    #: Resampling/testing types
    KFold, LeaveOneOut, Bootstrap, TestOnTrain, TestOnTest = 0, 1, 2, 3, 4

    #: Selected resampling type
    resampling = settings.Setting(0)
    #: Number of folds for K-fold cross validation
    k_folds = settings.Setting(10)
    #: Number of repeats for bootstrap sampling
    n_repeat = settings.Setting(10)
    #: Bootstrap sampling p
    sample_p = settings.Setting(75)

    class_selection = settings.ContextSetting("(None)")

    def __init__(self, parent=None):
        super().__init__(parent)

        self.train_data = None
        self.test_data = None

        #: An Ordered dictionary with current inputs and their testing
        #: results.
        self.learners = OrderedDict()

        sbox = gui.widgetBox(self.controlArea, "Sampling")
        rbox = gui.radioButtons(
            sbox, self, "resampling", callback=self._param_changed
        )
        gui.appendRadioButton(rbox, "Cross validation")
        ibox = gui.indentedBox(rbox)
        gui.spin(ibox, self, "k_folds", 2, 50, label="Number of folds:",
                 callback=self.kfold_changed)
        gui.appendRadioButton(rbox, "Leave one out")
        gui.appendRadioButton(rbox, "Random sampling")
        ibox = gui.indentedBox(rbox)
        gui.spin(ibox, self, "n_repeat", 2, 50, label="Repeat train/test",
                 callback=self.bootstrap_changed)
        gui.widgetLabel(ibox, "Relative training set size:")
        gui.hSlider(ibox, self, "sample_p", minValue=1, maxValue=100,
                    ticks=20, vertical=False, labelFormat="%d %%",
                    callback=self.bootstrap_changed)

        gui.appendRadioButton(rbox, "Test on train data")
        gui.appendRadioButton(rbox, "Test on test data")

        rbox.layout().addSpacing(5)
        gui.button(rbox, self, "Apply", callback=self.apply)

        self.cbox = gui.widgetBox(self.controlArea, "Target class")
        self.class_selection_combo = gui.comboBox(self.cbox, self, "class_selection",
             items=[],
             callback=self._select_class,
             sendSelectedValue=True, valueType=str)

        gui.rubber(self.controlArea)


        self.view = QTreeView(
            rootIsDecorated=False,
            uniformRowHeights=True,
            wordWrap=True,
            editTriggers=QTreeView.NoEditTriggers
        )
        header = self.view.header()
        header.setResizeMode(QHeaderView.ResizeToContents)
        header.setDefaultAlignment(Qt.AlignCenter)
        header.setStretchLastSection(False)

        self.result_model = QStandardItemModel()
        self.view.setModel(self.result_model)
        self.view.setItemDelegate(ItemDelegate())
        self._update_header()
        box = gui.widgetBox(self.mainArea, "Evaluation Results")
        box.layout().addWidget(self.view)

    def set_learner(self, learner, key):
        if key in self.learners and learner is None:
            del self.learners[key]
        else:
            self.learners[key] = Input(learner, None, ())
        self._update_stats_model()

    def set_train_data(self, data):
        self.error(0)
        if data and not data.domain.class_var:
            self.error(0, "Train data input requires a class variable")
            data = None

        self.train_data = data
        self.closeContext()
        if data is not None:
            self.openContext(data.domain.class_var)
            self._update_header()
        self._update_class_selection()
        self._invalidate()

    def set_test_data(self, data):
        self.error(1)
        if data and not data.domain.class_var:
            self.error(1, "Test data input requires a class variable")
            data = None

        self.test_data = data
        if self.resampling == OWTestLearners.TestOnTest:
            self._invalidate()

    def handleNewSignals(self):
        self.update_results()
        self.commit()

    def kfold_changed(self):
        self.resampling = OWTestLearners.KFold
        self._param_changed()

    def bootstrap_changed(self):
        self.resampling = OWTestLearners.Bootstrap
        self._param_changed()

    def _param_changed(self):
        self._invalidate()

    def update_results(self):
        self.warning([1, 2])
        self.error(2)

        if self.train_data is None:
            return

        if self.resampling == OWTestLearners.TestOnTest:
            if self.test_data is None:
                self.warning(2, "Missing separate test data input")
                return

            elif self.test_data.domain.class_var != \
                    self.train_data.domain.class_var:
                self.error(2, ("Inconsistent class variable between test " +
                               "and train data sets"))
                return

        # items in need of an update
        items = [(key, input) for key, input in self.learners.items()
                 if input.results is None]
        learners = [input.learner for _, input in items]

        self.setStatusMessage("Running")
        if self.test_data is not None and \
                self.resampling != OWTestLearners.TestOnTest:
            self.warning(1, "Test data is present but unused. "
                            "Select 'Test on test data' to use it.")

        # TODO: Test each learner individually
        try:
            if self.resampling == OWTestLearners.KFold:
                results = Orange.evaluation.CrossValidation(
                    self.train_data, learners, k=self.k_folds, store_data=True
                )
            elif self.resampling == OWTestLearners.LeaveOneOut:
                results = Orange.evaluation.LeaveOneOut(
                    self.train_data, learners, store_data=True
                )
            elif self.resampling == OWTestLearners.Bootstrap:
                p = self.sample_p / 100.0
                results = Orange.evaluation.Bootstrap(
                    self.train_data, learners, n_resamples=self.n_repeat, p=p,
                    store_data=True
                )
            elif self.resampling == OWTestLearners.TestOnTrain:
                results = Orange.evaluation.TestOnTrainingData(
                    self.train_data, learners, store_data=True
                )
            elif self.resampling == OWTestLearners.TestOnTest:
                if self.test_data is None:
                    return
                results = Orange.evaluation.TestOnTestData(
                    self.train_data, self.test_data, learners, store_data=True
                )
            else:
                assert False
        except Exception as e:
            self.error(2, str(e))
            return

        self.results = results
        results = list(split_by_model(results))

        class_var = self.train_data.domain.class_var

        if class_var.is_discrete:
            stats = [classification_stats(self.one_vs_rest(res)) for res in results]
        else:
            stats = [regression_stats(res) for res in results]

        self._update_header()

        for (key, input), res, stat in zip(items, results, stats):
            self.learners[key] = input._replace(results=res, stats=stat)

        self.setStatusMessage("")

        self._update_stats_model()


    def _update_header(self):
        headers = ["Method"]
        if self.train_data is not None:
            if self.train_data.domain.class_var.is_discrete:
                headers.extend(classification_stats.headers)
            else:
                headers.extend(regression_stats.headers)
        for i in reversed(range(len(headers),
                                self.result_model.columnCount())):
            self.result_model.takeColumn(i)

        self.result_model.setHorizontalHeaderLabels(headers)

    def _update_stats_model(self):
        model = self.view.model()

        for r in reversed(range(model.rowCount())):
            model.takeRow(r)

        for input in self.learners.values():
            name = learner_name(input.learner)
            row = []
            head = QStandardItem()
            head.setData(name, Qt.DisplayRole)
            row.append(head)
            for stat in input.stats:
                item = QStandardItem()
                item.setData(" {:.3f} ".format(stat[0]), Qt.DisplayRole)
                row.append(item)
            model.appendRow(row)

    def _update_class_selection(self):
        self.class_selection_combo.clear()
        if not self.train_data:
            return
        if self.train_data.domain.class_var.is_discrete:
            self.cbox.setVisible(True)
            values = self.train_data.domain.class_var.values
            self.class_selection_combo.addItem("(None)")
            self.class_selection_combo.addItems(values)

            class_index = 0
            if self.class_selection in self.train_data.domain.class_var.values:
                    class_index = self.train_data.domain.class_var.values.index(self.class_selection)+1
            else:
                self.class_selection = '(None)'

            self.class_selection_combo.setCurrentIndex(class_index)
            self.previous_class_selection = "(None)"
        else:
            self.cbox.setVisible(False)

    def one_vs_rest(self, res):
        if self.class_selection != '(None)' and self.class_selection != 0:
            class_ = self.train_data.domain.class_var.values.index(self.class_selection)
            actual = res.actual == class_
            predicted = res.predicted == class_
            return Results(
                nmethods=1, domain=self.train_data.domain,
                actual=actual, predicted=predicted)
        else:
            return res

    def _select_class(self):
        if self.previous_class_selection == self.class_selection:
            return

        results = list(split_by_model(self.results))

        items = [(key, input) for key, input in self.learners.items()]
        learners = [input.learner for _, input in items]

        class_var = self.train_data.domain.class_var
        if class_var.is_discrete:
            stats = [classification_stats(self.one_vs_rest(res)) for res in results]
        else:
            stats = [regression_stats(res) for res in results]

        for (key, input), res, stat in zip(items, results, stats):
            self.learners[key] = input._replace(results=res, stats=stat)

        self.setStatusMessage("")

        self._update_stats_model()
        self.previous_class_selection = self.class_selection

    def _invalidate(self, which=None):
        if which is None:
            which = self.learners.keys()

        all_keys = list(self.learners.keys())
        model = self.view.model()

        for key in which:
            self.learners[key] = \
                self.learners[key]._replace(results=None, stats=None)

            if key in self.learners:
                row = all_keys.index(key)
                for c in range(1, model.columnCount()):
                    item = model.item(row, c)
                    if item is not None:
                        item.setData(None, Qt.DisplayRole)

    def apply(self):
        self.update_results()
        self.commit()

    def commit(self):
        results = [val.results for val in self.learners.values()
                   if val.results is not None]
        if results:
            combined = results_merge(results)
            combined.learner_names = [learner_name(val.learner)
                                     for val in self.learners.values()]
        else:
            combined = None
        self.send("Evaluation Results", combined)
Exemple #8
0
class OWTestLearners(OWWidget):
    name = "Test & Score"
    description = "Cross-validation accuracy estimation."
    icon = "icons/TestLearners1.svg"
    priority = 100

    inputs = [
        ("Learner", Learner, "set_learner", widget.Multiple),
        ("Data", Table, "set_train_data", widget.Default),
        ("Test Data", Table, "set_test_data"),
        ("Preprocessor", Preprocess, "set_preprocessor"),
    ]

    outputs = [("Predictions", Table), ("Evaluation Results", Results)]

    settingsHandler = settings.ClassValuesContextHandler()

    #: Resampling/testing types
    KFold, ShuffleSplit, LeaveOneOut, TestOnTrain, TestOnTest = 0, 1, 2, 3, 4
    #: Numbers of folds
    NFolds = [2, 3, 5, 10, 20]
    #: Number of repetitions
    NRepeats = [2, 3, 5, 10, 20, 50, 100]
    #: Sample sizes
    SampleSizes = [5, 10, 20, 25, 30, 33, 40, 50, 60, 66, 70, 75, 80, 90, 95]

    #: Selected resampling type
    resampling = settings.Setting(0)
    #: Number of folds for K-fold cross validation
    n_folds = settings.Setting(3)
    #: Stratified sampling for K-fold
    cv_stratified = settings.Setting(True)
    #: Number of repeats for ShuffleSplit sampling
    n_repeats = settings.Setting(3)
    #: ShuffleSplit sample size
    sample_size = settings.Setting(9)
    #: Stratified sampling for Random Sampling
    shuffle_stratified = settings.Setting(True)

    TARGET_AVERAGE = "(Average over classes)"
    class_selection = settings.ContextSetting(TARGET_AVERAGE)

    class Error(OWWidget.Error):
        class_required = Msg("Train data input requires a class variable")
        class_required_test = Msg("Test data input requires a class variable")
        too_many_folds = Msg("Number of folds exceeds the data size")
        class_inconsistent = Msg("Test and train data sets " "have different classes")

    class Warning(OWWidget.Warning):
        missing_data = Msg("Instances with unknown target values were removed from{}data")
        test_data_missing = Msg("Missing separate test data input")
        scores_not_computed = Msg("Some scores could not be computed")
        test_data_unused = Msg("Test data is present but unused. " "Select 'Test on test data' to use it.")

    class Information(OWWidget.Information):
        data_sampled = Msg("Train data has been sampled")
        test_data_sampled = Msg("Test data has been sampled")

    def __init__(self):
        super().__init__()

        self.data = None
        self.test_data = None
        self.preprocessor = None
        self.train_data_missing_vals = False
        self.test_data_missing_vals = False

        #: An Ordered dictionary with current inputs and their testing results.
        self.learners = OrderedDict()

        sbox = gui.vBox(self.controlArea, "Sampling")
        rbox = gui.radioButtons(sbox, self, "resampling", callback=self._param_changed)

        gui.appendRadioButton(rbox, "Cross validation")
        ibox = gui.indentedBox(rbox)
        gui.comboBox(
            ibox,
            self,
            "n_folds",
            label="Number of folds: ",
            items=[str(x) for x in self.NFolds],
            maximumContentsLength=3,
            orientation=Qt.Horizontal,
            callback=self.kfold_changed,
        )
        gui.checkBox(ibox, self, "cv_stratified", "Stratified", callback=self.kfold_changed)

        gui.appendRadioButton(rbox, "Random sampling")
        ibox = gui.indentedBox(rbox)
        gui.comboBox(
            ibox,
            self,
            "n_repeats",
            label="Repeat train/test: ",
            items=[str(x) for x in self.NRepeats],
            maximumContentsLength=3,
            orientation=Qt.Horizontal,
            callback=self.shuffle_split_changed,
        )
        gui.comboBox(
            ibox,
            self,
            "sample_size",
            label="Training set size: ",
            items=["{} %".format(x) for x in self.SampleSizes],
            maximumContentsLength=5,
            orientation=Qt.Horizontal,
            callback=self.shuffle_split_changed,
        )
        gui.checkBox(ibox, self, "shuffle_stratified", "Stratified", callback=self.shuffle_split_changed)

        gui.appendRadioButton(rbox, "Leave one out")

        gui.appendRadioButton(rbox, "Test on train data")
        gui.appendRadioButton(rbox, "Test on test data")

        self.cbox = gui.vBox(self.controlArea, "Target Class")
        self.class_selection_combo = gui.comboBox(
            self.cbox,
            self,
            "class_selection",
            items=[],
            sendSelectedValue=True,
            valueType=str,
            callback=self._on_target_class_changed,
            contentsLength=8,
        )

        gui.rubber(self.controlArea)

        self.view = gui.TableView(wordWrap=True)
        header = self.view.horizontalHeader()
        header.setResizeMode(QHeaderView.ResizeToContents)
        header.setDefaultAlignment(Qt.AlignCenter)
        header.setStretchLastSection(False)

        self.result_model = QStandardItemModel(self)
        self.result_model.setHorizontalHeaderLabels(["Method"])
        self.view.setModel(self.result_model)
        self.view.setItemDelegate(ItemDelegate())

        box = gui.vBox(self.mainArea, "Evaluation Results")
        box.layout().addWidget(self.view)

    def sizeHint(self):
        return QSize(780, 1)

    def set_learner(self, learner, key):
        """
        Set the input `learner` for `key`.
        """
        if key in self.learners and learner is None:
            # Removed
            del self.learners[key]
        else:
            self.learners[key] = Input(learner, None, None)
            self._invalidate([key])

    def set_train_data(self, data):
        """
        Set the input training dataset.
        """
        self.Information.data_sampled.clear()
        if data and not data.domain.class_var:
            self.Error.class_required()
            data = None
        else:
            self.Error.class_required.clear()

        if isinstance(data, SqlTable):
            if data.approx_len() < AUTO_DL_LIMIT:
                data = Table(data)
            else:
                self.Information.data_sampled()
                data_sample = data.sample_time(1, no_cache=True)
                data_sample.download_data(AUTO_DL_LIMIT, partial=True)
                data = Table(data_sample)

        self.train_data_missing_vals = data is not None and np.isnan(data.Y).any()
        if self.train_data_missing_vals or self.test_data_missing_vals:
            self.Warning.missing_data(self._which_missing_data())
            if data:
                data = RemoveNaNClasses(data)
        else:
            self.Warning.missing_data.clear()

        self.data = data
        self.closeContext()
        if data is not None:
            self._update_class_selection()
            self.openContext(data.domain.class_var)
        self._invalidate()

    def set_test_data(self, data):
        """
        Set the input separate testing dataset.
        """
        self.Information.test_data_sampled.clear()
        if data and not data.domain.class_var:
            self.Error.class_required()
            data = None
        else:
            self.Error.class_required_test.clear()

        if isinstance(data, SqlTable):
            if data.approx_len() < AUTO_DL_LIMIT:
                data = Table(data)
            else:
                self.Information.test_data_sampled()
                data_sample = data.sample_time(1, no_cache=True)
                data_sample.download_data(AUTO_DL_LIMIT, partial=True)
                data = Table(data_sample)

        self.test_data_missing_vals = data is not None and np.isnan(data.Y).any()
        if self.train_data_missing_vals or self.test_data_missing_vals:
            self.Warning.missing_data(self._which_missing_data())
            if data:
                data = RemoveNaNClasses(data)
        else:
            self.Warning.missing_data.clear()

        self.test_data = data
        if self.resampling == OWTestLearners.TestOnTest:
            self._invalidate()

    def _which_missing_data(self):
        return {(True, True): " ", (True, False): " train ", (False, True): " test "}[  # both, don't specify
            (self.train_data_missing_vals, self.test_data_missing_vals)
        ]

    def set_preprocessor(self, preproc):
        """
        Set the input preprocessor to apply on the training data.
        """
        self.preprocessor = preproc
        self._invalidate()

    def handleNewSignals(self):
        """Reimplemented from OWWidget.handleNewSignals."""
        self._update_class_selection()
        self.commit()

    def kfold_changed(self):
        self.resampling = OWTestLearners.KFold
        self._param_changed()

    def shuffle_split_changed(self):
        self.resampling = OWTestLearners.ShuffleSplit
        self._param_changed()

    def _param_changed(self):
        self._invalidate()

    def _update_results(self):
        """
        Run/evaluate the learners.
        """
        self.Warning.test_data_unused.clear()
        self.Warning.test_data_missing.clear()
        self.warning()
        self.Error.class_inconsistent.clear()
        self.Error.too_many_folds.clear()
        self.error()
        if self.data is None:
            return

        class_var = self.data.domain.class_var

        if self.resampling == OWTestLearners.TestOnTest:
            if self.test_data is None:
                self.Warning.test_data_missing()
                return
            elif self.test_data.domain.class_var != class_var:
                self.Error.class_inconsistent()
                return

        # items in need of an update
        items = [(key, slot) for key, slot in self.learners.items() if slot.results is None]
        learners = [slot.learner for _, slot in items]
        if len(items) == 0:
            return

        if self.test_data is not None and self.resampling != OWTestLearners.TestOnTest:
            self.Warning.test_data_unused()

        rstate = 42

        def update_progress(finished):
            self.progressBarSet(100 * finished)

        common_args = dict(store_data=True, preprocessor=self.preprocessor, callback=update_progress, n_jobs=-1)
        self.setStatusMessage("Running")

        with self.progressBar():
            try:
                folds = self.NFolds[self.n_folds]
                if self.resampling == OWTestLearners.KFold:
                    if len(self.data) < folds:
                        self.Error.too_many_folds()
                        return
                    warnings = []
                    results = Orange.evaluation.CrossValidation(
                        self.data, learners, k=folds, random_state=rstate, warnings=warnings, **common_args
                    )
                    if warnings:
                        self.warning(warnings[0])
                elif self.resampling == OWTestLearners.LeaveOneOut:
                    results = Orange.evaluation.LeaveOneOut(self.data, learners, **common_args)
                elif self.resampling == OWTestLearners.ShuffleSplit:
                    train_size = self.SampleSizes[self.sample_size] / 100
                    results = Orange.evaluation.ShuffleSplit(
                        self.data,
                        learners,
                        n_resamples=self.NRepeats[self.n_repeats],
                        train_size=train_size,
                        test_size=None,
                        stratified=self.shuffle_stratified,
                        random_state=rstate,
                        **common_args
                    )
                elif self.resampling == OWTestLearners.TestOnTrain:
                    results = Orange.evaluation.TestOnTrainingData(self.data, learners, **common_args)
                elif self.resampling == OWTestLearners.TestOnTest:
                    results = Orange.evaluation.TestOnTestData(self.data, self.test_data, learners, **common_args)
                else:
                    assert False
            except (RuntimeError, ValueError) as e:
                self.error(str(e))
                self.setStatusMessage("")
                return
            else:
                self.error()

        learner_key = {slot.learner: key for key, slot in self.learners.items()}
        for learner, result in zip(learners, results.split_by_model()):
            stats = None
            if class_var.is_discrete:
                scorers = classification_stats.scores
            elif class_var.is_continuous:
                scorers = regression_stats.scores
            else:
                scorers = None
            if scorers:
                ex = result.failed[0]
                if ex:
                    stats = [Try.Fail(ex)] * len(scorers)
                    result = Try.Fail(ex)
                else:
                    stats = [Try(lambda: score(result)) for score in scorers]
                    result = Try.Success(result)
            key = learner_key[learner]
            self.learners[key] = self.learners[key]._replace(results=result, stats=stats)

        self.setStatusMessage("")

    def _update_header(self):
        # Set the correct horizontal header labels on the results_model.
        headers = ["Method"]
        if self.data is not None:
            if self.data.domain.has_discrete_class:
                headers.extend(classification_stats.headers)
            else:
                headers.extend(regression_stats.headers)

        # remove possible extra columns from the model.
        for i in reversed(range(len(headers), self.result_model.columnCount())):
            self.result_model.takeColumn(i)

        self.result_model.setHorizontalHeaderLabels(headers)

    def _update_stats_model(self):
        # Update the results_model with up to date scores.
        # Note: The target class specific scores (if requested) are
        # computed as needed in this method.
        model = self.view.model()
        # clear the table model, but preserving the header labels
        for r in reversed(range(model.rowCount())):
            model.takeRow(r)

        target_index = None
        if self.data is not None:
            class_var = self.data.domain.class_var
            if self.data.domain.has_discrete_class and self.class_selection != self.TARGET_AVERAGE:
                target_index = class_var.values.index(self.class_selection)
        else:
            class_var = None

        errors = []
        has_missing_scores = False

        for key, slot in self.learners.items():
            name = learner_name(slot.learner)
            head = QStandardItem(name)
            head.setData(key, Qt.UserRole)
            if isinstance(slot.results, Try.Fail):
                head.setToolTip(str(slot.results.exception))
                head.setText("{} (error)".format(name))
                head.setForeground(QtGui.QBrush(Qt.red))
                errors.append(
                    "{name} failed with error:\n"
                    "{exc.__class__.__name__}: {exc!s}".format(name=name, exc=slot.results.exception)
                )

            row = [head]

            if class_var is not None and class_var.is_discrete and target_index is not None:
                if slot.results is not None and slot.results.success:
                    ovr_results = results_one_vs_rest(slot.results.value, target_index)

                    stats = [Try(lambda: score(ovr_results)) for score in classification_stats.scores]
                else:
                    stats = None
            else:
                stats = slot.stats

            if stats is not None:
                for stat in stats:
                    item = QStandardItem()
                    if stat.success:
                        item.setText("{:.3f}".format(stat.value[0]))
                    else:
                        item.setToolTip(str(stat.exception))
                        has_missing_scores = True
                    row.append(item)

            model.appendRow(row)

        self.error("\n".join(errors), shown=bool(errors))
        self.Warning.scores_not_computed(shown=has_missing_scores)

    def _update_class_selection(self):
        self.class_selection_combo.setCurrentIndex(-1)
        self.class_selection_combo.clear()
        if not self.data:
            return

        if self.data.domain.has_discrete_class:
            self.cbox.setVisible(True)
            class_var = self.data.domain.class_var
            items = [self.TARGET_AVERAGE] + class_var.values
            self.class_selection_combo.addItems(items)

            class_index = 0
            if self.class_selection in class_var.values:
                class_index = class_var.values.index(self.class_selection) + 1

            self.class_selection_combo.setCurrentIndex(class_index)
            self.class_selection = items[class_index]
        else:
            self.cbox.setVisible(False)

    def _on_target_class_changed(self):
        self._update_stats_model()

    def _invalidate(self, which=None):
        # Invalidate learner results for `which` input keys
        # (if None then all learner results are invalidated)
        if which is None:
            which = self.learners.keys()

        model = self.view.model()
        statmodelkeys = [model.item(row, 0).data(Qt.UserRole) for row in range(model.rowCount())]

        for key in which:
            self.learners[key] = self.learners[key]._replace(results=None, stats=None)

            if key in statmodelkeys:
                row = statmodelkeys.index(key)
                for c in range(1, model.columnCount()):
                    item = model.item(row, c)
                    if item is not None:
                        item.setData(None, Qt.DisplayRole)
                        item.setData(None, Qt.ToolTipRole)

        self.commit()

    def commit(self):
        """Recompute and output the results"""
        self._update_header()
        # Update the view to display the model names
        self._update_stats_model()
        self._update_results()
        self._update_stats_model()
        valid = [slot for slot in self.learners.values() if slot.results is not None and slot.results.success]
        if valid:
            # Evaluation results
            combined = results_merge([slot.results.value for slot in valid])
            combined.learner_names = [learner_name(slot.learner) for slot in valid]

            # Predictions & Probabilities
            predictions = combined.get_augmented_data(combined.learner_names)
        else:
            combined = None
            predictions = None
        self.send("Evaluation Results", combined)
        self.send("Predictions", predictions)

    def send_report(self):
        """Report on the testing schema and results"""
        if not self.data or not self.learners:
            return
        if self.resampling == self.KFold:
            stratified = "Stratified " if self.cv_stratified else ""
            items = [("Sampling type", "{}{}-fold Cross validation".format(stratified, self.NFolds[self.n_folds]))]
        elif self.resampling == self.LeaveOneOut:
            items = [("Sampling type", "Leave one out")]
        elif self.resampling == self.ShuffleSplit:
            stratified = "Stratified " if self.shuffle_stratified else ""
            items = [
                (
                    "Sampling type",
                    "{}Shuffle split, {} random samples with {}% data ".format(
                        stratified, self.NRepeats[self.n_repeats], self.SampleSizes[self.sample_size]
                    ),
                )
            ]
        elif self.resampling == self.TestOnTrain:
            items = [("Sampling type", "No sampling, test on training data")]
        elif self.resampling == self.TestOnTest:
            items = [("Sampling type", "No sampling, test on testing data")]
        else:
            items = []
        if self.data.domain.has_discrete_class:
            items += [("Target class", self.class_selection.strip("()"))]
        if items:
            self.report_items("Settings", items)
        self.report_table("Scores", self.view)
Exemple #9
0
class QgsAttributeTableDialog(QDialog):
    '''
    classdocs
    '''
    def __init__(self, parent, vectorlayer):
        QDialog.__init__(self, parent)
        #         self.w = QDialog(self)
        self.baseLayer = vectorlayer
        self.canChangeAttributes = self.validate(
            QgsVectorDataProvider.ChangeAttributeValues)
        self.canDeleteFeatures = self.validate(
            QgsVectorDataProvider.DeleteFeatures)
        self.canAddAttributes = self.validate(
            QgsVectorDataProvider.AddAttributes)
        self.canDeleteAttributes = self.validate(
            QgsVectorDataProvider.DeleteAttributes)
        self.canAddFeatures = self.validate(QgsVectorDataProvider.AddFeatures)

        gridLayout = QGridLayout(self)
        self.setLayout(gridLayout)

        self.setWindowTitle("Attribute Table")
        self.setFixedSize(QSize(800, 600))

        editorToolbar = QToolBar()
        editorToolbar.setFixedSize(QSize(768, 48))
        self.actionToggleEditing = QAction(QIcon("Resource\\edit.png"),
                                           "ToggleEditing", self)
        self.actionToggleEditing.triggered.connect(self.toggleEditing)
        if (self.canChangeAttributes or self.canDeleteFeatures
                or self.canAddAttributes or self.canDeleteAttributes
                or self.canAddFeatures) and (not self.baseLayer.isReadOnly()):
            self.actionToggleEditing.setEnabled(True)
        else:
            self.actionToggleEditing.setEnabled(False)
        self.actionToggleEditing.setCheckable(True)
        editorToolbar.addAction(self.actionToggleEditing)

        self.actionSave = QAction(QIcon("Resource\\filesave.png"),
                                  "Save Edits", self)
        self.actionSave.triggered.connect(self.saveEditing)
        self.actionSave.setCheckable(False)
        self.actionSave.setEnabled(False)
        editorToolbar.addAction(self.actionSave)

        self.actiondeleteRows = QAction(
            QIcon("Resource\\mActionDeleteSelected.png"),
            "Delete selected features", self)
        self.actiondeleteRows.triggered.connect(self.deleteRows)
        self.actiondeleteRows.setCheckable(False)
        self.actiondeleteRows.setEnabled(False)
        editorToolbar.addAction(self.actiondeleteRows)

        self.actionUnselectAll = QAction(
            QIcon("Resource\\mActionDeselectAll.png"), "Unselect All", self)
        self.actionUnselectAll.triggered.connect(self.unselectAll)
        self.actionUnselectAll.setCheckable(False)
        self.actionUnselectAll.setEnabled(True)
        editorToolbar.addAction(self.actionUnselectAll)

        self.actionSelectedToZoom = QAction(
            QIcon("Resource\\mActionZoomToSelected.png"),
            "Zoom map to the selected rows", self)
        self.actionSelectedToZoom.triggered.connect(self.selectedToZoom)
        self.actionSelectedToZoom.setCheckable(False)
        self.actionSelectedToZoom.setEnabled(True)
        editorToolbar.addAction(self.actionSelectedToZoom)

        gridLayout.addWidget(editorToolbar, 0, 0, 1, 1)

        self.model = QStandardItemModel()

        #         self.model.itemChanged.connect(self.attributeChanged)
        self.attributeTable = QTableView(self)
        self.attributeTable.setModel(self.model)
        self.attributeTable.setColumnWidth(0, 200)
        self.attributeTable.setColumnWidth(1, 160)
        self.attributeTable.clicked.connect(self.tableSelectionChanged)
        self.attributeTable.setSortingEnabled(True)

        self.changeItemList = []
        self.selectRows = []
        self.isSave = True
        self.initTable()
        gridLayout.addWidget(self.attributeTable, 1, 0, 1, 1)
#         self.attributeTable.selectionChanged.connect(self.selectFeature)

    def tableSelectionChanged(self):
        idxList = self.attributeTable.selectedIndexes()
        if idxList != None and len(idxList) > 0:
            self.baseLayer.removeSelection()
            fidList = []
            for idx in idxList:
                fid = int(self.model.item(idx.row()).text())
                fidList.append(fid)
            self.baseLayer.setSelectedFeatures(fidList)

    def toggleEditing(self):
        if self.baseLayer != None:

            if not self.actionToggleEditing.isChecked():

                if self.isSave:
                    self.baseLayer.commitChanges()
                    self.actionSave.setEnabled(False)
                    self.actiondeleteRows.setEnabled(False)
                    self.model.itemChanged.disconnect(self.attributeChanged)
                    self.toggleEditingTable(False)
                else:
                    button = QMessageBox.warning(
                        self, "Stop Editing",
                        "Do you want to save the changes to layer?",
                        QMessageBox.Save | QMessageBox.Discard
                        | QMessageBox.Cancel)
                    if (button == QMessageBox.Cancel):
                        self.actionToggleEditing.setChecked(True)

                    elif button == QMessageBox.Save:
                        self.saveEditing()
                        self.baseLayer.commitChanges()
                        self.actionSave.setEnabled(False)
                        self.actiondeleteRows.setEnabled(False)
                        self.model.itemChanged.disconnect(
                            self.attributeChanged)
                        self.toggleEditingTable(False)
                    else:
                        self.initTable()
                        self.baseLayer.commitChanges()
                        self.actionSave.setEnabled(False)
                        self.actiondeleteRows.setEnabled(False)
                        self.model.itemChanged.disconnect(
                            self.attributeChanged)
                        self.toggleEditingTable(False)
#                 self.isEditable = False
            else:
                self.actionSave.setEnabled(True)
                self.actiondeleteRows.setEnabled(True)
                self.baseLayer.startEditing()
                self.toggleEditingTable(True)
                self.model.itemChanged.connect(self.attributeChanged)
#                 self.isEditable = True

    def toggleEditingTable(self, isEditable):
        columnCount = self.model.columnCount()
        rowCount = self.model.rowCount()
        col = 0
        while col < columnCount:
            row = 0
            while row < rowCount:
                self.model.item(row, col).setEditable(isEditable)
                row += 1
            col += 1

    def attributeChanged(self, standardItem):
        self.isSave = False
        #         if not self.isDelete:
        self.changeItemList.append(standardItem)
#         featureId = standardItem.row()
#         self.baseLayer.changeAttributeValue(featureId,
#                                        standardItem.column(), standardItem.text())

    def saveEditing(self):
        self.isSave = True
        if len(self.changeItemList) > 0:
            for standardItem in self.changeItemList:
                featureId = standardItem.row()
                self.baseLayer.changeAttributeValue(featureId,
                                                    standardItem.column(),
                                                    standardItem.text())
            self.changeItemList = []
        if len(self.selectRows) > 0:
            for id in self.selectRows:
                self.baseLayer.deleteFeature(id)
            self.selectRows = []

    def initTable(self):
        self.model.clear()
        #         header = QHeaderView(Qt.Horizontal)
        #         headerModel = QStandardItemModel()

        layer = self.baseLayer
        fields = layer.pendingFields()
        headersList = ["fid"]
        for field in fields:
            headersList.append(field.name())
        self.model.setHorizontalHeaderLabels(headersList)

        #         headerModel.setHorizontalHeaderLabels(headersList)
        #         header.setModel(headerModel)
        #         self.attributeTable.setHorizontalHeader(header)

        if len(layer.selectedFeatures()) > 0:
            features = layer.selectedFeatures()
        else:
            features = layer.getFeatures()
        for feature in features:
            record = [QStandardItem(str(feature.id()))]

            for field in feature.fields():
                name = field.name()
                attribute = feature.attribute(name).toString()

                stdItemValue = QStandardItem(attribute)
                stdItemValue.setEditable(False)
                record.append(stdItemValue)
            self.model.appendRow(record)

    def deleteRows(self):
        if len(self.attributeTable.selectedIndexes()) > 0:
            self.isSave = False
            selectedIndexs = self.attributeTable.selectedIndexes()
            k = -1
            self.selectRows = []
            for index in selectedIndexs:
                if k != index.row():
                    k = index.row()
                    self.selectRows.append(k)
            for row in self.selectRows:
                self.model.takeRow(row)

    def unselectAll(self):
        if len(self.attributeTable.selectedIndexes()) > 0:
            self.attributeTable.clearSelection()

    def selectedToZoom(self):
        if len(self.attributeTable.selectedIndexes()) > 0:
            self.baseLayer.removeSelection()

            selectedIndexs = self.attributeTable.selectedIndexes()
            k = -1
            self.selectRows = []
            for index in selectedIndexs:
                if k != index.row():
                    k = index.row()
                    self.selectRows.append(k)
            self.baseLayer.setSelectedFeatures(self.selectRows)
#             for row in self.selectRows:
#                 self.model.takeRow(row)
        define._canvas.zoomToSelected(self.baseLayer)

    def validate(self, condition):
        if self.baseLayer.dataProvider().capabilities() & condition:
            return True
        else:
            return False
Exemple #10
0
class OWTestLearners(widget.OWWidget):
    name = "Test Learners"
    description = ""
    icon = "icons/TestLearners1.svg"
    priority = 100

    inputs = [("Learner", Orange.classification.Fitter,
               "set_learner", widget.Multiple),
              ("Data", Orange.data.Table, "set_train_data", widget.Default),
              ("Test Data", Orange.data.Table, "set_test_data")]

    outputs = [("Evaluation Results", testing.Results)]

    #: Resampling/testing types
    KFold, LeaveOneOut, Bootstrap, TestOnTrain, TestOnTest = 0, 1, 2, 3, 4

    #: Selected resampling type
    resampling = settings.Setting(0)
    #: Number of folds for K-fold cross validation
    k_folds = settings.Setting(10)
    #: Number of repeats for bootstrap sampling
    n_repeat = settings.Setting(10)
    #: Bootstrap sampling p
    sample_p = settings.Setting(75)

    def __init__(self, parent=None):
        super().__init__(parent)

        self.train_data = None
        self.test_data = None

        #: An Ordered dictionary with current inputs and their testing
        #: results.
        self.learners = OrderedDict()

        sbox = gui.widgetBox(self.controlArea, "Sampling")
        rbox = gui.radioButtons(
            sbox, self, "resampling", callback=self._param_changed
        )
        gui.appendRadioButton(rbox, "Cross validation")
        ibox = gui.indentedBox(rbox)
        gui.spin(ibox, self, "k_folds", 2, 50, label="Number of folds:",
                 callback=self._param_changed)
        gui.appendRadioButton(rbox, "Leave one out")
        gui.appendRadioButton(rbox, "Random sampling")
        ibox = gui.indentedBox(rbox)
        gui.spin(ibox, self, "n_repeat", 2, 50, label="Repeat train/test",
                 callback=self._param_changed)
        gui.widgetLabel(ibox, "Relative training set size:")
        gui.hSlider(ibox, self, "sample_p", minValue=1, maxValue=100,
                    ticks=20, vertical=False,
                    callback=self._param_changed)

        gui.appendRadioButton(rbox, "Test on train data")
        gui.appendRadioButton(rbox, "Test on test data")

        rbox.layout().addSpacing(5)
        gui.button(rbox, self, "Apply", callback=self.apply)

        gui.rubber(self.controlArea)

        self.view = QTreeView(
            rootIsDecorated=False,
            uniformRowHeights=True,
            wordWrap=True,
            editTriggers=QTreeView.NoEditTriggers
        )

        self.result_model = QStandardItemModel()
        self.view.setModel(self.result_model)
        self._update_header()
        box = gui.widgetBox(self.mainArea, "Evaluation Results")
        box.layout().addWidget(self.view)

    def set_learner(self, learner, key):
        if key in self.learners and learner is None:
            del self.learners[key]
        else:
            self.learners[key] = Input(learner, None, ())
        self._update_stats_model()

    def set_train_data(self, data):
        self.train_data = data
        self._update_header()
        self._invalidate()

    def set_test_data(self, data):
        self.test_data = data
        if self.resampling == OWTestLearners.TestOnTest:
            self._invalidate()

    def handleNewSignals(self):
        self.update_results()
        self.commit()

    def _param_changed(self):
        self._invalidate()

    def update_results(self):
        self.warning(1, "")
        if self.train_data is None:
            return

        # items in need of an update
        items = [(key, input) for key, input in self.learners.items()
                 if input.results is None]
        learners = [input.learner for _, input in items]

        self.setStatusMessage("Running")
        if self.test_data is not None and \
                self.resampling != OWTestLearners.TestOnTest:
            self.warning(1, "Select 'Test on test data' to use the test data")

        # TODO: Test each learner individually

        if self.resampling == OWTestLearners.KFold:
            results = testing.CrossValidation(
                self.train_data, learners, k=self.k_folds, store_data=True
            )
        elif self.resampling == OWTestLearners.LeaveOneOut:
            results = testing.LeaveOneOut(
                self.train_data, learners, store_data=True
            )
        elif self.resampling == OWTestLearners.Bootstrap:
            p = self.sample_p / 100.0
            results = testing.Bootstrap(
                self.train_data, learners, n_resamples=self.n_repeat, p=p,
                store_data=True
            )
        elif self.resampling == OWTestLearners.TestOnTrain:
            results = testing.TestOnTrainingData(
                self.train_data, learners, store_data=True
            )
        elif self.resampling == OWTestLearners.TestOnTest:
            if self.test_data is None:
                return
            results = testing.TestOnTestData(
                self.train_data, self.test_data, learners, store_data=True
            )
        else:
            assert False

        results = list(split_by_model(results))
        class_var = self.train_data.domain.class_var
        
        if is_discrete(class_var):
            test_stats = classification_stats
        else:
            test_stats = regression_stats
        
        self._update_header()
        
        stats = [test_stats(res) for res in results]
        for (key, input), res, stat in zip(items, results, stats):
            self.learners[key] = input._replace(results=res, stats=stat)

        self.setStatusMessage("")
        
        self._update_stats_model()

    def _update_header(self):
        headers = ["Method"]
        if self.train_data is not None:
            if is_discrete(self.train_data.domain.class_var):
                if len(self.train_data.domain.class_var.values) == 2:
                    headers.extend(classification_stats.headers_binary)
                headers.extend(classification_stats.headers)
            else:
                headers.extend(regression_stats.headers)
        for i in reversed(range(len(headers),
                                self.result_model.columnCount())):
            self.result_model.takeColumn(i)
        
        self.result_model.setHorizontalHeaderLabels(headers)

    def _update_stats_model(self):
        model = self.view.model()

        for r in reversed(range(model.rowCount())):
            model.takeRow(r)

        for input in self.learners.values():
            name = learner_name(input.learner)
            row = []
            head = QStandardItem()
            head.setData(name, Qt.DisplayRole)
            row.append(head)
            for stat in input.stats:
                item = QStandardItem()
                item.setData(float(stat[0]), Qt.DisplayRole)
                row.append(item)
            model.appendRow(row)

        self.view.resizeColumnToContents(0)

    def _invalidate(self, which=None):
        if which is None:
            which = self.learners.keys()

        all_keys = list(self.learners.keys())
        model = self.view.model()

        for key in which:
            self.learners[key] = \
                self.learners[key]._replace(results=None, stats=None)

            if key in self.learners:
                row = all_keys.index(key)
                for c in range(1, model.columnCount()):
                    item = model.item(row, c)
                    if item is not None:
                        item.setData(None, Qt.DisplayRole)

    def apply(self):
        self.update_results()
        self.commit()

    def commit(self):
        results = [val.results for val in self.learners.values()
                   if val.results is not None]
        if results:
            combined = results_merge(results)
            combined.fitter_names = [learner_name(val.learner)
                                     for val in self.learners.values()]
        else:
            combined = None
        self.send("Evaluation Results", combined)