class DataModel(object): def __init__(self): super(DataModel,self).__init__() self.servers = QStandardItemModel(0,3) self.selected = '' self.autoconnect = False def loadSettings(self): settings = QSettings() servers = settings.value("servers", None) self.servers.clear() if servers: for s in servers: self.servers.appendRow([QStandardItem(str(i)) for i in s]) self.selected = str(settings.value("selected", self.selected)) self.autoconnect = int(settings.value("autoconnect", self.autoconnect)) def saveSettings(self): settings = QSettings() servers = [] if self.servers: for row in xrange(self.servers.rowCount()): entry = [] for col in xrange(self.servers.columnCount()): entry += [self.servers.item(row,col).text()] servers += [entry] if len(servers): settings.setValue("servers", servers) else: settings.setValue("servers", None) settings.setValue("selected", str(self.selected)) settings.setValue("autoconnect", int(self.autoconnect)) def selectedServer(self): selectedServer = [] if self.selected: result = self.servers.findItems( self.selected) if len(result): mi = result[0] row = mi.row() for col in xrange(self.servers.columnCount()): selectedServer += [self.servers.item(row,col).text()] return selectedServer
def find_rules(self): if self.data is None: return data = self.data self.table.model().clear() n_examples = len(data) NumericItem = self.NumericItem StandardItem = self.StandardItem filterSearch = self.filterSearch itemsetMin = self.filterAntecedentMin + self.filterConsequentMin itemsetMax = self.filterAntecedentMax + self.filterConsequentMax isSizeMatch = self.isSizeMatch isRegexMatch = self.isRegexMatch X, mapping = OneHot.encode(data, self.classify) self.onehot_mapping = mapping names = {item: '{}={}'.format(var.name, val) for item, var, val in OneHot.decode(mapping, data, mapping)} # Items that consequent must include if classifying class_items = {item for item, var, val in OneHot.decode(mapping, data, mapping) if var is data.domain.class_var} if self.classify else set() assert bool(class_items) == bool(self.classify) model = QStandardItemModel(self.table) for col, (label, tooltip) in enumerate([("Supp", "Support"), ("Conf", "Confidence (support / antecedent support)"), ("Covr", "Coverage (antecedent support / number of examples)"), ("Strg", "Strength (consequent support / antecedent support)"), ("Lift", "Lift (number of examples * confidence / consequent support)"), ("Levr", "Leverage ((support * number of examples - antecedent support * consequent support) / (number of examples)²)"), ("Antecedent", None), ("", None), ("Consequent", None)]): item = QStandardItem(label) item.setToolTip(tooltip) model.setHorizontalHeaderItem(col, item) #~ # Aggregate rules by common (support,confidence) for scatterplot #~ scatter_agg = defaultdict(list) # Find itemsets nRules = 0 itemsets = {} progress = gui.ProgressBar(self, self.maxRules + 1) for itemset, support in frequent_itemsets(X, self.minSupport / 100): itemsets[itemset] = support if class_items and not class_items & itemset: continue # Filter itemset by joined filters before descending into it itemset_str = ' '.join(names[i] for i in itemset) if (filterSearch and (len(itemset) < itemsetMin or itemsetMax < len(itemset) or not isRegexMatch(itemset_str, itemset_str))): continue for rule in gen_assoc_rules(itemsets, self.minConfidence / 100, itemset): (left, right), support, confidence = rule if class_items and right - class_items: continue if filterSearch and not isSizeMatch(len(left), len(right)): continue left_str = ' '.join(names[i] for i in sorted(left)) right_str = ' '.join(names[i] for i in sorted(right)) if filterSearch and not isRegexMatch(left_str, right_str): continue # All filters matched, calculate stats and add table row _, _, _, coverage, strength, lift, leverage = next( rules_stats((rule,), itemsets, n_examples)) support_item = NumericItem(support / n_examples) # Set row data on first column support_item.setData((itemset - class_items, class_items and (class_items & itemset).pop()), self.ROW_DATA_ROLE) left_item = StandardItem(left_str, len(left)) left_item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) model.appendRow([support_item, NumericItem(confidence), NumericItem(coverage), NumericItem(strength), NumericItem(lift), NumericItem(leverage), left_item, StandardItem('→'), StandardItem(right_str, len(right))]) #~ scatter_agg[(round(support / n_examples, 2), round(confidence, 2))].append((left, right)) nRules += 1 progress.advance() if nRules >= self.maxRules: break if nRules >= self.maxRules: break # Populate the TableView table = self.table table.setHidden(True) table.setSortingEnabled(False) proxy_model = self.proxy_model proxy_model.setSourceModel(model) table.setModel(proxy_model) for i in range(model.columnCount()): table.resizeColumnToContents(i) table.setSortingEnabled(True) table.setHidden(False) progress.finish() self.nRules = nRules self.nFilteredRules = proxy_model.rowCount() # TODO: continue; also add in owitemsets self.nSelectedRules = 0 self.nSelectedExamples = 0
class OWTestLearners(widget.OWWidget): name = "Test Learners" description = "" icon = "icons/TestLearners1.svg" priority = 100 inputs = [("Learner", Orange.classification.Learner, "set_learner", widget.Multiple), ("Data", Orange.data.Table, "set_train_data", widget.Default), ("Test Data", Orange.data.Table, "set_test_data")] outputs = [("Evaluation Results", testing.Results)] #: Resampling/testing types KFold, LeaveOneOut, Bootstrap, TestOnTrain, TestOnTest = 0, 1, 2, 3, 4 #: Selected resampling type resampling = settings.Setting(0) #: Number of folds for K-fold cross validation k_folds = settings.Setting(10) #: Number of repeats for bootstrap sampling n_repeat = settings.Setting(10) #: Bootstrap sampling p sample_p = settings.Setting(75) def __init__(self, parent=None): super().__init__(parent) self.train_data = None self.test_data = None #: An Ordered dictionary with current inputs and their testing #: results. self.learners = OrderedDict() sbox = gui.widgetBox(self.controlArea, "Sampling") rbox = gui.radioButtons( sbox, self, "resampling", callback=self._param_changed ) gui.appendRadioButton(rbox, "Cross validation") ibox = gui.indentedBox(rbox) gui.spin(ibox, self, "k_folds", 2, 50, label="Number of folds:", callback=self.kfold_changed) gui.appendRadioButton(rbox, "Leave one out") gui.appendRadioButton(rbox, "Random sampling") ibox = gui.indentedBox(rbox) gui.spin(ibox, self, "n_repeat", 2, 50, label="Repeat train/test", callback=self.bootstrap_changed) gui.widgetLabel(ibox, "Relative training set size:") gui.hSlider(ibox, self, "sample_p", minValue=1, maxValue=100, ticks=20, vertical=False, labelFormat="%d %%", callback=self.bootstrap_changed) gui.appendRadioButton(rbox, "Test on train data") gui.appendRadioButton(rbox, "Test on test data") rbox.layout().addSpacing(5) gui.button(rbox, self, "Apply", callback=self.apply) gui.rubber(self.controlArea) self.view = QTreeView( rootIsDecorated=False, uniformRowHeights=True, wordWrap=True, editTriggers=QTreeView.NoEditTriggers ) header = self.view.header() header.setResizeMode(QHeaderView.ResizeToContents) header.setDefaultAlignment(Qt.AlignCenter) header.setStretchLastSection(False) self.result_model = QStandardItemModel() self.view.setModel(self.result_model) self.view.setItemDelegate(ItemDelegate()) self._update_header() box = gui.widgetBox(self.mainArea, "Evaluation Results") box.layout().addWidget(self.view) def set_learner(self, learner, key): if key in self.learners and learner is None: del self.learners[key] else: self.learners[key] = Input(learner, None, ()) self._update_stats_model() def set_train_data(self, data): self.error(0) if data is not None: if data.domain.class_var is None: self.error(0, "Train data input requires a class variable") data = None self.train_data = data self._update_header() self._invalidate() def set_test_data(self, data): self.error(1) if data is not None: if data.domain.class_var is None: self.error(1, "Test data input requires a class variable") data = None self.test_data = data if self.resampling == OWTestLearners.TestOnTest: self._invalidate() def handleNewSignals(self): self.update_results() self.commit() def kfold_changed(self): self.resampling = OWTestLearners.KFold self._param_changed() def bootstrap_changed(self): self.resampling = OWTestLearners.Bootstrap self._param_changed() def _param_changed(self): self._invalidate() def update_results(self): self.warning([1, 2]) self.error(2) if self.train_data is None: return if self.resampling == OWTestLearners.TestOnTest: if self.test_data is None: self.warning(2, "Missing separate test data input") return elif self.test_data.domain.class_var != \ self.train_data.domain.class_var: self.error(2, ("Inconsistent class variable between test " + "and train data sets")) return # items in need of an update items = [(key, input) for key, input in self.learners.items() if input.results is None] learners = [input.learner for _, input in items] self.setStatusMessage("Running") if self.test_data is not None and \ self.resampling != OWTestLearners.TestOnTest: self.warning(1, "Test data is present but unused. " "Select 'Test on test data' to use it.") # TODO: Test each learner individually if self.resampling == OWTestLearners.KFold: results = testing.CrossValidation( self.train_data, learners, k=self.k_folds, store_data=True ) elif self.resampling == OWTestLearners.LeaveOneOut: results = testing.LeaveOneOut( self.train_data, learners, store_data=True ) elif self.resampling == OWTestLearners.Bootstrap: p = self.sample_p / 100.0 results = testing.Bootstrap( self.train_data, learners, n_resamples=self.n_repeat, p=p, store_data=True ) elif self.resampling == OWTestLearners.TestOnTrain: results = testing.TestOnTrainingData( self.train_data, learners, store_data=True ) elif self.resampling == OWTestLearners.TestOnTest: if self.test_data is None: return results = testing.TestOnTestData( self.train_data, self.test_data, learners, store_data=True ) else: assert False results = list(split_by_model(results)) class_var = self.train_data.domain.class_var if is_discrete(class_var): test_stats = classification_stats else: test_stats = regression_stats self._update_header() stats = [test_stats(res) for res in results] for (key, input), res, stat in zip(items, results, stats): self.learners[key] = input._replace(results=res, stats=stat) self.setStatusMessage("") self._update_stats_model() def _update_header(self): headers = ["Method"] if self.train_data is not None: if is_discrete(self.train_data.domain.class_var): headers.extend(classification_stats.headers) else: headers.extend(regression_stats.headers) for i in reversed(range(len(headers), self.result_model.columnCount())): self.result_model.takeColumn(i) self.result_model.setHorizontalHeaderLabels(headers) def _update_stats_model(self): model = self.view.model() for r in reversed(range(model.rowCount())): model.takeRow(r) for input in self.learners.values(): name = learner_name(input.learner) row = [] head = QStandardItem() head.setData(name, Qt.DisplayRole) row.append(head) for stat in input.stats: item = QStandardItem() item.setData(" {:.3f} ".format(stat[0]), Qt.DisplayRole) row.append(item) model.appendRow(row) def _invalidate(self, which=None): if which is None: which = self.learners.keys() all_keys = list(self.learners.keys()) model = self.view.model() for key in which: self.learners[key] = \ self.learners[key]._replace(results=None, stats=None) if key in self.learners: row = all_keys.index(key) for c in range(1, model.columnCount()): item = model.item(row, c) if item is not None: item.setData(None, Qt.DisplayRole) def apply(self): self.update_results() self.commit() def commit(self): results = [val.results for val in self.learners.values() if val.results is not None] if results: combined = results_merge(results) combined.learner_names = [learner_name(val.learner) for val in self.learners.values()] else: combined = None self.send("Evaluation Results", combined)
class OWTestLearners(widget.OWWidget): name = "Test & Score" description = "Cross-validation accuracy estimation." icon = "icons/TestLearners1.svg" priority = 100 inputs = [("Learner", Learner, "set_learner", widget.Multiple), ("Data", Table, "set_train_data", widget.Default), ("Test Data", Table, "set_test_data"), ("Preprocessor", Preprocess, "set_preprocessor")] outputs = [("Predictions", Orange.data.Table), ("Evaluation Results", Orange.evaluation.Results)] settingsHandler = settings.ClassValuesContextHandler() #: Resampling/testing types KFold, LeaveOneOut, ShuffleSplit, TestOnTrain, TestOnTest = 0, 1, 2, 3, 4 #: Selected resampling type resampling = settings.Setting(0) #: Number of folds for K-fold cross validation k_folds = settings.Setting(10) #: Number of repeats for ShuffleSplit sampling n_repeat = settings.Setting(10) #: ShuffleSplit sampling p sample_p = settings.Setting(75) TARGET_AVERAGE = "(Average over classes)" class_selection = settings.ContextSetting(TARGET_AVERAGE) def __init__(self): super().__init__() self.data = None self.test_data = None self.preprocessor = None self.train_data_missing_vals = False self.test_data_missing_vals = False #: An Ordered dictionary with current inputs and their testing #: results. self.learners = OrderedDict() sbox = gui.widgetBox(self.controlArea, "Sampling") rbox = gui.radioButtons( sbox, self, "resampling", callback=self._param_changed ) gui.appendRadioButton(rbox, "Cross validation") ibox = gui.indentedBox(rbox) gui.spin(ibox, self, "k_folds", 2, 50, label="Number of folds:", callback=self.kfold_changed) gui.appendRadioButton(rbox, "Leave one out") gui.appendRadioButton(rbox, "Random sampling") ibox = gui.indentedBox(rbox) gui.spin(ibox, self, "n_repeat", 2, 50, label="Repeat train/test", callback=self.shuffle_split_changed) gui.widgetLabel(ibox, "Relative training set size:") gui.hSlider(ibox, self, "sample_p", minValue=1, maxValue=99, ticks=20, vertical=False, labelFormat="%d %%", callback=self.shuffle_split_changed) gui.appendRadioButton(rbox, "Test on train data") gui.appendRadioButton(rbox, "Test on test data") rbox.layout().addSpacing(5) self.apply_button = gui.button( rbox, self, "Apply", callback=self.apply, default=True) self.cbox = gui.widgetBox(self.controlArea, "Target class") self.class_selection_combo = gui.comboBox( self.cbox, self, "class_selection", items=[], sendSelectedValue=True, valueType=str, callback=self._on_target_class_changed, contentsLength=8) gui.rubber(self.controlArea) self.view = QTreeView( rootIsDecorated=False, uniformRowHeights=True, wordWrap=True, editTriggers=QTreeView.NoEditTriggers ) header = self.view.header() header.setResizeMode(QHeaderView.ResizeToContents) header.setDefaultAlignment(Qt.AlignCenter) header.setStretchLastSection(False) self.result_model = QStandardItemModel(self) self.result_model.setHorizontalHeaderLabels(["Method"]) self.view.setModel(self.result_model) self.view.setItemDelegate(ItemDelegate()) box = gui.widgetBox(self.mainArea, "Evaluation Results") box.layout().addWidget(self.view) def sizeHint(self): return QSize(780, 1) def set_learner(self, learner, key): """ Set the input `learner` for `key`. """ if key in self.learners and learner is None: # Removed del self.learners[key] else: self.learners[key] = Input(learner, None, None) self._invalidate([key]) def set_train_data(self, data): """ Set the input training dataset. """ self.error(0) self.information(0) if data and not data.domain.class_var: self.error(0, "Train data input requires a class variable") data = None if isinstance(data, SqlTable): if data.approx_len() < AUTO_DL_LIMIT: data = Table(data) else: self.information(0, "Train data has been sampled") data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(AUTO_DL_LIMIT, partial=True) data = Table(data_sample) self.warning(4) self.train_data_missing_vals = data is not None and \ np.isnan(data.Y).any() if self.train_data_missing_vals or self.test_data_missing_vals: self.warning(4, self._get_missing_data_warning( self.train_data_missing_vals, self.test_data_missing_vals )) if data: data = RemoveNaNClasses(data) self.data = data self.closeContext() if data is not None: self._update_class_selection() self.openContext(data.domain.class_var) self._invalidate() def set_test_data(self, data): """ Set the input separate testing dataset. """ self.error(1) self.information(1) if data and not data.domain.class_var: self.error(1, "Test data input requires a class variable") data = None if isinstance(data, SqlTable): if data.approx_len() < AUTO_DL_LIMIT: data = Table(data) else: self.information(1, "Test data has been sampled") data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(AUTO_DL_LIMIT, partial=True) data = Table(data_sample) self.warning(4) self.test_data_missing_vals = data is not None and \ np.isnan(data.Y).any() if self.train_data_missing_vals or self.test_data_missing_vals: self.warning(4, self._get_missing_data_warning( self.train_data_missing_vals, self.test_data_missing_vals )) if data: data = RemoveNaNClasses(data) self.test_data = data if self.resampling == OWTestLearners.TestOnTest: self._invalidate() def _get_missing_data_warning(self, train_missing, test_missing): return "Instances with unknown target values were removed from{}data"\ .format(train_missing * test_missing * " " or train_missing * " train " or test_missing * " test ") def set_preprocessor(self, preproc): """ Set the input preprocessor to apply on the training data. """ self.preprocessor = preproc self._invalidate() def handleNewSignals(self): """Reimplemented from OWWidget.handleNewSignals.""" self._update_class_selection() self.apply() def kfold_changed(self): self.resampling = OWTestLearners.KFold self._param_changed() def shuffle_split_changed(self): self.resampling = OWTestLearners.ShuffleSplit self._param_changed() def _param_changed(self): self._invalidate() def _update_results(self): """ Run/evaluate the learners. """ self.warning([1, 2]) self.error(2) if self.data is None: return class_var = self.data.domain.class_var if self.resampling == OWTestLearners.TestOnTest: if self.test_data is None: self.warning(2, "Missing separate test data input") return elif self.test_data.domain.class_var != class_var: self.error(2, ("Inconsistent class variable between test " + "and train data sets")) return # items in need of an update items = [(key, slot) for key, slot in self.learners.items() if slot.results is None] learners = [slot.learner for _, slot in items] if len(items) == 0: return if self.test_data is not None and \ self.resampling != OWTestLearners.TestOnTest: self.warning(1, "Test data is present but unused. " "Select 'Test on test data' to use it.") rstate = 42 def update_progress(finished): self.progressBarSet(100 * finished) common_args = dict( store_data=True, preprocessor=self.preprocessor, callback=update_progress) self.setStatusMessage("Running") self.progressBarInit() try: if self.resampling == OWTestLearners.KFold: warnings = [] results = Orange.evaluation.CrossValidation( self.data, learners, k=self.k_folds, random_state=rstate, warnings=warnings, **common_args) if warnings: self.warning(2, warnings[0]) elif self.resampling == OWTestLearners.LeaveOneOut: results = Orange.evaluation.LeaveOneOut( self.data, learners, **common_args) elif self.resampling == OWTestLearners.ShuffleSplit: train_size = self.sample_p / 100 results = Orange.evaluation.ShuffleSplit( self.data, learners, n_resamples=self.n_repeat, train_size=train_size, test_size=None, random_state=rstate, **common_args) elif self.resampling == OWTestLearners.TestOnTrain: results = Orange.evaluation.TestOnTrainingData( self.data, learners, **common_args) elif self.resampling == OWTestLearners.TestOnTest: results = Orange.evaluation.TestOnTestData( self.data, self.test_data, learners, **common_args) else: assert False except RuntimeError as e: self.error(2, str(e)) self.setStatusMessage("") self.progressBarFinished() return learner_key = {slot.learner: key for key, slot in self.learners.items()} for learner, result in zip(learners, split_by_model(results)): stats = None if class_var.is_discrete: scorers = classification_stats.scores elif class_var.is_continuous: scorers = regression_stats.scores else: scorers = None if scorers: ex = result.failed[0] if ex: stats = [Try.Fail(ex)] * len(scorers) result = Try.Fail(ex) else: stats = [Try(lambda: score(result)) for score in scorers] result = Try.Success(result) key = learner_key[learner] self.learners[key] = \ self.learners[key]._replace(results=result, stats=stats) self.setStatusMessage("") self.progressBarFinished() def _update_header(self): # Set the correct horizontal header labels on the results_model. headers = ["Method"] if self.data is not None: if self.data.domain.has_discrete_class: headers.extend(classification_stats.headers) else: headers.extend(regression_stats.headers) # remove possible extra columns from the model. for i in reversed(range(len(headers), self.result_model.columnCount())): self.result_model.takeColumn(i) self.result_model.setHorizontalHeaderLabels(headers) def _update_stats_model(self): # Update the results_model with up to date scores. # Note: The target class specific scores (if requested) are # computed as needed in this method. model = self.view.model() # clear the table model, but preserving the header labels for r in reversed(range(model.rowCount())): model.takeRow(r) target_index = None if self.data is not None: class_var = self.data.domain.class_var if class_var.is_discrete and \ self.class_selection != self.TARGET_AVERAGE: target_index = class_var.values.index(self.class_selection) else: class_var = None errors = [] has_missing_scores = False for key, slot in self.learners.items(): name = learner_name(slot.learner) head = QStandardItem(name) head.setData(key, Qt.UserRole) if isinstance(slot.results, Try.Fail): head.setToolTip(str(slot.results.exception)) head.setText("{} (error)".format(name)) head.setForeground(QtGui.QBrush(Qt.red)) errors.append("{name} failed with error:\n" "{exc.__class__.__name__}: {exc!s}" .format(name=name, exc=slot.results.exception)) row = [head] if class_var is not None and class_var.is_discrete and \ target_index is not None: if slot.results is not None and slot.results.success: ovr_results = results_one_vs_rest( slot.results.value, target_index) stats = [Try(lambda: score(ovr_results)) for score in classification_stats.scores] else: stats = None else: stats = slot.stats if stats is not None: for stat in stats: item = QStandardItem() if stat.success: item.setText("{:.3f}".format(stat.value[0])) else: item.setToolTip(str(stat.exception)) has_missing_scores = True row.append(item) model.appendRow(row) if errors: self.error(3, "\n".join(errors)) else: self.error(3) if has_missing_scores: self.warning(3, "Some scores could not be computed") else: self.warning(3) def _update_class_selection(self): self.class_selection_combo.setCurrentIndex(-1) self.class_selection_combo.clear() if not self.data: return if self.data.domain.has_discrete_class: self.cbox.setVisible(True) class_var = self.data.domain.class_var items = [self.TARGET_AVERAGE] + class_var.values self.class_selection_combo.addItems(items) class_index = 0 if self.class_selection in class_var.values: class_index = class_var.values.index(self.class_selection) + 1 self.class_selection_combo.setCurrentIndex(class_index) self.class_selection = items[class_index] else: self.cbox.setVisible(False) def _on_target_class_changed(self): self._update_stats_model() def _invalidate(self, which=None): # Invalidate learner results for `which` input keys # (if None then all learner results are invalidated) if which is None: which = self.learners.keys() model = self.view.model() statmodelkeys = [model.item(row, 0).data(Qt.UserRole) for row in range(model.rowCount())] for key in which: self.learners[key] = \ self.learners[key]._replace(results=None, stats=None) if key in statmodelkeys: row = statmodelkeys.index(key) for c in range(1, model.columnCount()): item = model.item(row, c) if item is not None: item.setData(None, Qt.DisplayRole) item.setData(None, Qt.ToolTipRole) self.apply_button.setEnabled(True) def apply(self): self.apply_button.setEnabled(False) self._update_header() # Update the view to display the model names self._update_stats_model() self._update_results() self._update_stats_model() self.commit() def commit(self): valid = [slot for slot in self.learners.values() if slot.results is not None and slot.results.success] if valid: # Evaluation results combined = results_merge([slot.results.value for slot in valid]) combined.learner_names = [learner_name(slot.learner) for slot in valid] # Predictions & Probabilities predictions = combined.get_augmented_data(combined.learner_names) else: combined = None predictions = None self.send("Evaluation Results", combined) self.send("Predictions", predictions)
def find_rules(self): if self.data is None or not len(self.data): return if self._is_running: return self._is_running = True data = self.data self.table.model().clear() n_examples = len(data) NumericItem = self.NumericItem StandardItem = self.StandardItem filterSearch = self.filterSearch itemsetMin = self.filterAntecedentMin + self.filterConsequentMin itemsetMax = self.filterAntecedentMax + self.filterConsequentMax isSizeMatch = self.isSizeMatch isRegexMatch = self.isRegexMatch X, mapping = OneHot.encode(data, self.classify) self.error(911) if X is None: self.error(911, 'Need some discrete data to work with.') self.onehot_mapping = mapping ITEM_FMT = '{}' if issparse(data.X) else '{}={}' names = { item: ('{}={}' if var is data.domain.class_var else ITEM_FMT).format( var.name, val) for item, var, val in OneHot.decode(mapping, data, mapping) } # Items that consequent must include if classifying class_items = { item for item, var, val in OneHot.decode(mapping, data, mapping) if var is data.domain.class_var } if self.classify else set() assert bool(class_items) == bool(self.classify) model = QStandardItemModel(self.table) for col, (label, tooltip) in enumerate([ ("Supp", "Support"), ("Conf", "Confidence (support / antecedent support)"), ("Covr", "Coverage (antecedent support / number of examples)"), ("Strg", "Strength (consequent support / antecedent support)"), ("Lift", "Lift (number of examples * confidence / consequent support)"), ("Levr", "Leverage ((support * number of examples - antecedent support * consequent support) / (number of examples)²)" ), ("Antecedent", None), ("", None), ("Consequent", None) ]): item = QStandardItem(label) item.setToolTip(tooltip) model.setHorizontalHeaderItem(col, item) #~ # Aggregate rules by common (support,confidence) for scatterplot #~ scatter_agg = defaultdict(list) # Find itemsets nRules = 0 itemsets = {} with self.progressBar(self.maxRules + 1) as progress: for itemset, support in frequent_itemsets(X, self.minSupport / 100): itemsets[itemset] = support if class_items and not class_items & itemset: continue # Filter itemset by joined filters before descending into it itemset_str = ' '.join(names[i] for i in itemset) if (filterSearch and (len(itemset) < itemsetMin or itemsetMax < len(itemset) or not isRegexMatch(itemset_str, itemset_str))): continue for rule in association_rules(itemsets, self.minConfidence / 100, itemset): left, right, support, confidence = rule if class_items and right - class_items: continue if filterSearch and not isSizeMatch(len(left), len(right)): continue left_str = ', '.join(names[i] for i in sorted(left)) right_str = ', '.join(names[i] for i in sorted(right)) if filterSearch and not isRegexMatch(left_str, right_str): continue # All filters matched, calculate stats and add table row _, _, _, _, coverage, strength, lift, leverage = next( rules_stats((rule, ), itemsets, n_examples)) support_item = NumericItem(support / n_examples) # Set row data on first column support_item.setData( (itemset - class_items, class_items and (class_items & itemset).pop()), self.ROW_DATA_ROLE) left_item = StandardItem(left_str, len(left)) left_item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) model.appendRow([ support_item, NumericItem(confidence), NumericItem(coverage), NumericItem(strength), NumericItem(lift), NumericItem(leverage), left_item, StandardItem('→'), StandardItem(right_str, len(right)) ]) #~ scatter_agg[(round(support / n_examples, 2), round(confidence, 2))].append((left, right)) nRules += 1 progress.advance() if nRules >= self.maxRules: break if nRules >= self.maxRules: break # Populate the TableView table = self.table table.setHidden(True) table.setSortingEnabled(False) proxy_model = self.proxy_model proxy_model.setSourceModel(model) table.setModel(proxy_model) for i in range(model.columnCount()): table.resizeColumnToContents(i) table.setSortingEnabled(True) table.setHidden(False) self.nRules = nRules self.nFilteredRules = proxy_model.rowCount( ) # TODO: continue; also add in owitemsets self.nSelectedRules = 0 self.nSelectedExamples = 0 self._is_running = False
class OWTestLearners(widget.OWWidget): name = "Test & Score" description = "Cross-validation accuracy estimation." icon = "icons/TestLearners1.svg" priority = 100 inputs = [("Learner", Learner, "set_learner", widget.Multiple), ("Data", Table, "set_train_data", widget.Default), ("Test Data", Table, "set_test_data"), ("Preprocessor", Preprocess, "set_preprocessor")] outputs = [("Predictions", Orange.data.Table), ("Evaluation Results", Orange.evaluation.Results)] settingsHandler = settings.ClassValuesContextHandler() #: Resampling/testing types KFold, ShuffleSplit, LeaveOneOut, TestOnTrain, TestOnTest = 0, 1, 2, 3, 4 #: Selected resampling type resampling = settings.Setting(0) #: Number of folds for K-fold cross validation k_folds = settings.Setting(10) #: Stratified sampling for K-fold cv_stratified = settings.Setting(True) #: Number of repeats for ShuffleSplit sampling n_repeat = settings.Setting(10) #: ShuffleSplit sampling p sample_p = settings.Setting(75) #: Stratified sampling for Random Sampling shuffle_stratified = settings.Setting(True) TARGET_AVERAGE = "(Average over classes)" class_selection = settings.ContextSetting(TARGET_AVERAGE) def __init__(self): super().__init__() self.data = None self.test_data = None self.preprocessor = None self.train_data_missing_vals = False self.test_data_missing_vals = False #: An Ordered dictionary with current inputs and their testing #: results. self.learners = OrderedDict() sbox = gui.widgetBox(self.controlArea, "Sampling") rbox = gui.radioButtons(sbox, self, "resampling", callback=self._param_changed) gui.appendRadioButton(rbox, "Cross validation") ibox = gui.indentedBox(rbox) gui.spin(ibox, self, "k_folds", 2, 50, label="Number of folds:", callback=self.kfold_changed) gui.checkBox(ibox, self, "cv_stratified", "Stratified", callback=self.kfold_changed) gui.appendRadioButton(rbox, "Random sampling") ibox = gui.indentedBox(rbox) gui.spin(ibox, self, "n_repeat", 2, 50, label="Repeat train/test", callback=self.shuffle_split_changed) gui.widgetLabel(ibox, "Relative training set size:") gui.hSlider(ibox, self, "sample_p", minValue=1, maxValue=99, ticks=20, vertical=False, labelFormat="%d %%", callback=self.shuffle_split_changed) gui.checkBox(ibox, self, "shuffle_stratified", "Stratified", callback=self.shuffle_split_changed) gui.appendRadioButton(rbox, "Leave one out") gui.appendRadioButton(rbox, "Test on train data") gui.appendRadioButton(rbox, "Test on test data") rbox.layout().addSpacing(5) self.apply_button = gui.button(rbox, self, "Apply", callback=self.apply, default=True) self.cbox = gui.widgetBox(self.controlArea, "Target class") self.class_selection_combo = gui.comboBox( self.cbox, self, "class_selection", items=[], sendSelectedValue=True, valueType=str, callback=self._on_target_class_changed, contentsLength=8) gui.rubber(self.controlArea) self.view = QTreeView(rootIsDecorated=False, uniformRowHeights=True, wordWrap=True, editTriggers=QTreeView.NoEditTriggers) header = self.view.header() header.setResizeMode(QHeaderView.ResizeToContents) header.setDefaultAlignment(Qt.AlignCenter) header.setStretchLastSection(False) self.result_model = QStandardItemModel(self) self.result_model.setHorizontalHeaderLabels(["Method"]) self.view.setModel(self.result_model) self.view.setItemDelegate(ItemDelegate()) box = gui.widgetBox(self.mainArea, "Evaluation Results") box.layout().addWidget(self.view) def sizeHint(self): return QSize(780, 1) def set_learner(self, learner, key): """ Set the input `learner` for `key`. """ if key in self.learners and learner is None: # Removed del self.learners[key] else: self.learners[key] = Input(learner, None, None) self._invalidate([key]) def set_train_data(self, data): """ Set the input training dataset. """ self.error(0) self.information(0) if data and not data.domain.class_var: self.error(0, "Train data input requires a class variable") data = None if isinstance(data, SqlTable): if data.approx_len() < AUTO_DL_LIMIT: data = Table(data) else: self.information(0, "Train data has been sampled") data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(AUTO_DL_LIMIT, partial=True) data = Table(data_sample) self.warning(4) self.train_data_missing_vals = data is not None and \ np.isnan(data.Y).any() if self.train_data_missing_vals or self.test_data_missing_vals: self.warning( 4, self._get_missing_data_warning(self.train_data_missing_vals, self.test_data_missing_vals)) if data: data = RemoveNaNClasses(data) self.data = data self.closeContext() if data is not None: self._update_class_selection() self.openContext(data.domain.class_var) self._invalidate() def set_test_data(self, data): """ Set the input separate testing dataset. """ self.error(1) self.information(1) if data and not data.domain.class_var: self.error(1, "Test data input requires a class variable") data = None if isinstance(data, SqlTable): if data.approx_len() < AUTO_DL_LIMIT: data = Table(data) else: self.information(1, "Test data has been sampled") data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(AUTO_DL_LIMIT, partial=True) data = Table(data_sample) self.warning(4) self.test_data_missing_vals = data is not None and \ np.isnan(data.Y).any() if self.train_data_missing_vals or self.test_data_missing_vals: self.warning( 4, self._get_missing_data_warning(self.train_data_missing_vals, self.test_data_missing_vals)) if data: data = RemoveNaNClasses(data) self.test_data = data if self.resampling == OWTestLearners.TestOnTest: self._invalidate() def _get_missing_data_warning(self, train_missing, test_missing): return "Instances with unknown target values were removed from{}data"\ .format(train_missing * test_missing * " " or train_missing * " train " or test_missing * " test ") def set_preprocessor(self, preproc): """ Set the input preprocessor to apply on the training data. """ self.preprocessor = preproc self._invalidate() def handleNewSignals(self): """Reimplemented from OWWidget.handleNewSignals.""" self._update_class_selection() self.apply() def kfold_changed(self): self.resampling = OWTestLearners.KFold self._param_changed() def shuffle_split_changed(self): self.resampling = OWTestLearners.ShuffleSplit self._param_changed() def _param_changed(self): self._invalidate() def _update_results(self): """ Run/evaluate the learners. """ self.warning([1, 2]) self.error([2, 4]) if self.data is None: return class_var = self.data.domain.class_var if self.resampling == OWTestLearners.TestOnTest: if self.test_data is None: self.warning(2, "Missing separate test data input") return elif self.test_data.domain.class_var != class_var: self.error(2, ("Inconsistent class variable between test " + "and train data sets")) return # items in need of an update items = [(key, slot) for key, slot in self.learners.items() if slot.results is None] learners = [slot.learner for _, slot in items] if len(items) == 0: return if self.test_data is not None and \ self.resampling != OWTestLearners.TestOnTest: self.warning( 1, "Test data is present but unused. " "Select 'Test on test data' to use it.") rstate = 42 def update_progress(finished): self.progressBarSet(100 * finished) common_args = dict(store_data=True, preprocessor=self.preprocessor, callback=update_progress) self.setStatusMessage("Running") with self.progressBar(): try: if self.resampling == OWTestLearners.KFold: if len(self.data) < self.k_folds: self.error(4, "Number of folds exceeds the data size") return warnings = [] results = Orange.evaluation.CrossValidation( self.data, learners, k=self.k_folds, random_state=rstate, warnings=warnings, **common_args) if warnings: self.warning(2, warnings[0]) elif self.resampling == OWTestLearners.LeaveOneOut: results = Orange.evaluation.LeaveOneOut( self.data, learners, **common_args) elif self.resampling == OWTestLearners.ShuffleSplit: train_size = self.sample_p / 100 results = Orange.evaluation.ShuffleSplit( self.data, learners, n_resamples=self.n_repeat, train_size=train_size, test_size=None, stratified=self.shuffle_stratified, random_state=rstate, **common_args) elif self.resampling == OWTestLearners.TestOnTrain: results = Orange.evaluation.TestOnTrainingData( self.data, learners, **common_args) elif self.resampling == OWTestLearners.TestOnTest: results = Orange.evaluation.TestOnTestData( self.data, self.test_data, learners, **common_args) else: assert False except RuntimeError as e: self.error(2, str(e)) self.setStatusMessage("") return learner_key = { slot.learner: key for key, slot in self.learners.items() } for learner, result in zip(learners, split_by_model(results)): stats = None if class_var.is_discrete: scorers = classification_stats.scores elif class_var.is_continuous: scorers = regression_stats.scores else: scorers = None if scorers: ex = result.failed[0] if ex: stats = [Try.Fail(ex)] * len(scorers) result = Try.Fail(ex) else: stats = [Try(lambda: score(result)) for score in scorers] result = Try.Success(result) key = learner_key[learner] self.learners[key] = \ self.learners[key]._replace(results=result, stats=stats) self.setStatusMessage("") def _update_header(self): # Set the correct horizontal header labels on the results_model. headers = ["Method"] if self.data is not None: if self.data.domain.has_discrete_class: headers.extend(classification_stats.headers) else: headers.extend(regression_stats.headers) # remove possible extra columns from the model. for i in reversed(range(len(headers), self.result_model.columnCount())): self.result_model.takeColumn(i) self.result_model.setHorizontalHeaderLabels(headers) def _update_stats_model(self): # Update the results_model with up to date scores. # Note: The target class specific scores (if requested) are # computed as needed in this method. model = self.view.model() # clear the table model, but preserving the header labels for r in reversed(range(model.rowCount())): model.takeRow(r) target_index = None if self.data is not None: class_var = self.data.domain.class_var if self.data.domain.has_discrete_class and \ self.class_selection != self.TARGET_AVERAGE: target_index = class_var.values.index(self.class_selection) else: class_var = None errors = [] has_missing_scores = False for key, slot in self.learners.items(): name = learner_name(slot.learner) head = QStandardItem(name) head.setData(key, Qt.UserRole) if isinstance(slot.results, Try.Fail): head.setToolTip(str(slot.results.exception)) head.setText("{} (error)".format(name)) head.setForeground(QtGui.QBrush(Qt.red)) errors.append("{name} failed with error:\n" "{exc.__class__.__name__}: {exc!s}".format( name=name, exc=slot.results.exception)) row = [head] if class_var is not None and class_var.is_discrete and \ target_index is not None: if slot.results is not None and slot.results.success: ovr_results = results_one_vs_rest(slot.results.value, target_index) stats = [ Try(lambda: score(ovr_results)) for score in classification_stats.scores ] else: stats = None else: stats = slot.stats if stats is not None: for stat in stats: item = QStandardItem() if stat.success: item.setText("{:.3f}".format(stat.value[0])) else: item.setToolTip(str(stat.exception)) has_missing_scores = True row.append(item) model.appendRow(row) if errors: self.error(3, "\n".join(errors)) else: self.error(3) if has_missing_scores: self.warning(3, "Some scores could not be computed") else: self.warning(3) def _update_class_selection(self): self.class_selection_combo.setCurrentIndex(-1) self.class_selection_combo.clear() if not self.data: return if self.data.domain.has_discrete_class: self.cbox.setVisible(True) class_var = self.data.domain.class_var items = [self.TARGET_AVERAGE] + class_var.values self.class_selection_combo.addItems(items) class_index = 0 if self.class_selection in class_var.values: class_index = class_var.values.index(self.class_selection) + 1 self.class_selection_combo.setCurrentIndex(class_index) self.class_selection = items[class_index] else: self.cbox.setVisible(False) def _on_target_class_changed(self): self._update_stats_model() def _invalidate(self, which=None): # Invalidate learner results for `which` input keys # (if None then all learner results are invalidated) if which is None: which = self.learners.keys() model = self.view.model() statmodelkeys = [ model.item(row, 0).data(Qt.UserRole) for row in range(model.rowCount()) ] for key in which: self.learners[key] = \ self.learners[key]._replace(results=None, stats=None) if key in statmodelkeys: row = statmodelkeys.index(key) for c in range(1, model.columnCount()): item = model.item(row, c) if item is not None: item.setData(None, Qt.DisplayRole) item.setData(None, Qt.ToolTipRole) self.apply_button.setEnabled(True) def apply(self): self.apply_button.setEnabled(False) self._update_header() # Update the view to display the model names self._update_stats_model() self._update_results() self._update_stats_model() self.commit() def commit(self): valid = [ slot for slot in self.learners.values() if slot.results is not None and slot.results.success ] if valid: # Evaluation results combined = results_merge([slot.results.value for slot in valid]) combined.learner_names = [ learner_name(slot.learner) for slot in valid ] # Predictions & Probabilities predictions = combined.get_augmented_data(combined.learner_names) else: combined = None predictions = None self.send("Evaluation Results", combined) self.send("Predictions", predictions) def send_report(self): if not self.data or not self.learners: return if self.resampling == self.KFold: stratified = 'Stratified ' if self.cv_stratified else '' items = [ ("Sampling type", "{}{}-fold Cross validation".format(stratified, self.k_folds)) ] elif self.resampling == self.LeaveOneOut: items = [("Sampling type", "Leave one out")] elif self.resampling == self.ShuffleSplit: stratified = 'Stratified ' if self.shuffle_stratified else '' items = [ ("Sampling type", "{}Shuffle split, {} random samples with {}% data ".format( stratified, self.n_repeat, self.sample_p)) ] elif self.resampling == self.TestOnTrain: items = [("Sampling type", "No sampling, test on training data")] elif self.resampling == self.TestOnTest: items = [("Sampling type", "No sampling, test on testing data")] else: items = [] if self.data.domain.has_discrete_class: items += [("Target class", self.class_selection.strip("()"))] if items: self.report_items("Settings", items) self.report_table("Scores", self.view)
class OWTestLearners(widget.OWWidget): name = "Test Learners" description = "Cross-validation accuracy estimation." icon = "icons/TestLearners1.svg" priority = 100 inputs = [("Learner", Orange.classification.Learner, "set_learner", widget.Multiple), ("Data", Orange.data.Table, "set_train_data", widget.Default), ("Test Data", Orange.data.Table, "set_test_data")] outputs = [("Evaluation Results", Orange.evaluation.Results)] settingsHandler = settings.ClassValuesContextHandler() #: Resampling/testing types KFold, LeaveOneOut, Bootstrap, TestOnTrain, TestOnTest = 0, 1, 2, 3, 4 #: Selected resampling type resampling = settings.Setting(0) #: Number of folds for K-fold cross validation k_folds = settings.Setting(10) #: Number of repeats for bootstrap sampling n_repeat = settings.Setting(10) #: Bootstrap sampling p sample_p = settings.Setting(75) class_selection = settings.ContextSetting("(None)") def __init__(self, parent=None): super().__init__(parent) self.train_data = None self.test_data = None #: An Ordered dictionary with current inputs and their testing #: results. self.learners = OrderedDict() sbox = gui.widgetBox(self.controlArea, "Sampling") rbox = gui.radioButtons( sbox, self, "resampling", callback=self._param_changed ) gui.appendRadioButton(rbox, "Cross validation") ibox = gui.indentedBox(rbox) gui.spin(ibox, self, "k_folds", 2, 50, label="Number of folds:", callback=self.kfold_changed) gui.appendRadioButton(rbox, "Leave one out") gui.appendRadioButton(rbox, "Random sampling") ibox = gui.indentedBox(rbox) gui.spin(ibox, self, "n_repeat", 2, 50, label="Repeat train/test", callback=self.bootstrap_changed) gui.widgetLabel(ibox, "Relative training set size:") gui.hSlider(ibox, self, "sample_p", minValue=1, maxValue=100, ticks=20, vertical=False, labelFormat="%d %%", callback=self.bootstrap_changed) gui.appendRadioButton(rbox, "Test on train data") gui.appendRadioButton(rbox, "Test on test data") rbox.layout().addSpacing(5) gui.button(rbox, self, "Apply", callback=self.apply) self.cbox = gui.widgetBox(self.controlArea, "Target class") self.class_selection_combo = gui.comboBox(self.cbox, self, "class_selection", items=[], callback=self._select_class, sendSelectedValue=True, valueType=str) gui.rubber(self.controlArea) self.view = QTreeView( rootIsDecorated=False, uniformRowHeights=True, wordWrap=True, editTriggers=QTreeView.NoEditTriggers ) header = self.view.header() header.setResizeMode(QHeaderView.ResizeToContents) header.setDefaultAlignment(Qt.AlignCenter) header.setStretchLastSection(False) self.result_model = QStandardItemModel() self.view.setModel(self.result_model) self.view.setItemDelegate(ItemDelegate()) self._update_header() box = gui.widgetBox(self.mainArea, "Evaluation Results") box.layout().addWidget(self.view) def set_learner(self, learner, key): if key in self.learners and learner is None: del self.learners[key] else: self.learners[key] = Input(learner, None, ()) self._update_stats_model() def set_train_data(self, data): self.error(0) if data and not data.domain.class_var: self.error(0, "Train data input requires a class variable") data = None self.train_data = data self.closeContext() if data is not None: self.openContext(data.domain.class_var) self._update_header() self._update_class_selection() self._invalidate() def set_test_data(self, data): self.error(1) if data and not data.domain.class_var: self.error(1, "Test data input requires a class variable") data = None self.test_data = data if self.resampling == OWTestLearners.TestOnTest: self._invalidate() def handleNewSignals(self): self.update_results() self.commit() def kfold_changed(self): self.resampling = OWTestLearners.KFold self._param_changed() def bootstrap_changed(self): self.resampling = OWTestLearners.Bootstrap self._param_changed() def _param_changed(self): self._invalidate() def update_results(self): self.warning([1, 2]) self.error(2) if self.train_data is None: return if self.resampling == OWTestLearners.TestOnTest: if self.test_data is None: self.warning(2, "Missing separate test data input") return elif self.test_data.domain.class_var != \ self.train_data.domain.class_var: self.error(2, ("Inconsistent class variable between test " + "and train data sets")) return # items in need of an update items = [(key, input) for key, input in self.learners.items() if input.results is None] learners = [input.learner for _, input in items] self.setStatusMessage("Running") if self.test_data is not None and \ self.resampling != OWTestLearners.TestOnTest: self.warning(1, "Test data is present but unused. " "Select 'Test on test data' to use it.") # TODO: Test each learner individually try: if self.resampling == OWTestLearners.KFold: results = Orange.evaluation.CrossValidation( self.train_data, learners, k=self.k_folds, store_data=True ) elif self.resampling == OWTestLearners.LeaveOneOut: results = Orange.evaluation.LeaveOneOut( self.train_data, learners, store_data=True ) elif self.resampling == OWTestLearners.Bootstrap: p = self.sample_p / 100.0 results = Orange.evaluation.Bootstrap( self.train_data, learners, n_resamples=self.n_repeat, p=p, store_data=True ) elif self.resampling == OWTestLearners.TestOnTrain: results = Orange.evaluation.TestOnTrainingData( self.train_data, learners, store_data=True ) elif self.resampling == OWTestLearners.TestOnTest: if self.test_data is None: return results = Orange.evaluation.TestOnTestData( self.train_data, self.test_data, learners, store_data=True ) else: assert False except Exception as e: self.error(2, str(e)) return self.results = results results = list(split_by_model(results)) class_var = self.train_data.domain.class_var if class_var.is_discrete: stats = [classification_stats(self.one_vs_rest(res)) for res in results] else: stats = [regression_stats(res) for res in results] self._update_header() for (key, input), res, stat in zip(items, results, stats): self.learners[key] = input._replace(results=res, stats=stat) self.setStatusMessage("") self._update_stats_model() def _update_header(self): headers = ["Method"] if self.train_data is not None: if self.train_data.domain.class_var.is_discrete: headers.extend(classification_stats.headers) else: headers.extend(regression_stats.headers) for i in reversed(range(len(headers), self.result_model.columnCount())): self.result_model.takeColumn(i) self.result_model.setHorizontalHeaderLabels(headers) def _update_stats_model(self): model = self.view.model() for r in reversed(range(model.rowCount())): model.takeRow(r) for input in self.learners.values(): name = learner_name(input.learner) row = [] head = QStandardItem() head.setData(name, Qt.DisplayRole) row.append(head) for stat in input.stats: item = QStandardItem() item.setData(" {:.3f} ".format(stat[0]), Qt.DisplayRole) row.append(item) model.appendRow(row) def _update_class_selection(self): self.class_selection_combo.clear() if not self.train_data: return if self.train_data.domain.class_var.is_discrete: self.cbox.setVisible(True) values = self.train_data.domain.class_var.values self.class_selection_combo.addItem("(None)") self.class_selection_combo.addItems(values) class_index = 0 if self.class_selection in self.train_data.domain.class_var.values: class_index = self.train_data.domain.class_var.values.index(self.class_selection)+1 else: self.class_selection = '(None)' self.class_selection_combo.setCurrentIndex(class_index) self.previous_class_selection = "(None)" else: self.cbox.setVisible(False) def one_vs_rest(self, res): if self.class_selection != '(None)' and self.class_selection != 0: class_ = self.train_data.domain.class_var.values.index(self.class_selection) actual = res.actual == class_ predicted = res.predicted == class_ return Results( nmethods=1, domain=self.train_data.domain, actual=actual, predicted=predicted) else: return res def _select_class(self): if self.previous_class_selection == self.class_selection: return results = list(split_by_model(self.results)) items = [(key, input) for key, input in self.learners.items()] learners = [input.learner for _, input in items] class_var = self.train_data.domain.class_var if class_var.is_discrete: stats = [classification_stats(self.one_vs_rest(res)) for res in results] else: stats = [regression_stats(res) for res in results] for (key, input), res, stat in zip(items, results, stats): self.learners[key] = input._replace(results=res, stats=stat) self.setStatusMessage("") self._update_stats_model() self.previous_class_selection = self.class_selection def _invalidate(self, which=None): if which is None: which = self.learners.keys() all_keys = list(self.learners.keys()) model = self.view.model() for key in which: self.learners[key] = \ self.learners[key]._replace(results=None, stats=None) if key in self.learners: row = all_keys.index(key) for c in range(1, model.columnCount()): item = model.item(row, c) if item is not None: item.setData(None, Qt.DisplayRole) def apply(self): self.update_results() self.commit() def commit(self): results = [val.results for val in self.learners.values() if val.results is not None] if results: combined = results_merge(results) combined.learner_names = [learner_name(val.learner) for val in self.learners.values()] else: combined = None self.send("Evaluation Results", combined)
class OWTestLearners(OWWidget): name = "Test & Score" description = "Cross-validation accuracy estimation." icon = "icons/TestLearners1.svg" priority = 100 inputs = [ ("Learner", Learner, "set_learner", widget.Multiple), ("Data", Table, "set_train_data", widget.Default), ("Test Data", Table, "set_test_data"), ("Preprocessor", Preprocess, "set_preprocessor"), ] outputs = [("Predictions", Table), ("Evaluation Results", Results)] settingsHandler = settings.ClassValuesContextHandler() #: Resampling/testing types KFold, ShuffleSplit, LeaveOneOut, TestOnTrain, TestOnTest = 0, 1, 2, 3, 4 #: Numbers of folds NFolds = [2, 3, 5, 10, 20] #: Number of repetitions NRepeats = [2, 3, 5, 10, 20, 50, 100] #: Sample sizes SampleSizes = [5, 10, 20, 25, 30, 33, 40, 50, 60, 66, 70, 75, 80, 90, 95] #: Selected resampling type resampling = settings.Setting(0) #: Number of folds for K-fold cross validation n_folds = settings.Setting(3) #: Stratified sampling for K-fold cv_stratified = settings.Setting(True) #: Number of repeats for ShuffleSplit sampling n_repeats = settings.Setting(3) #: ShuffleSplit sample size sample_size = settings.Setting(9) #: Stratified sampling for Random Sampling shuffle_stratified = settings.Setting(True) TARGET_AVERAGE = "(Average over classes)" class_selection = settings.ContextSetting(TARGET_AVERAGE) class Error(OWWidget.Error): class_required = Msg("Train data input requires a class variable") class_required_test = Msg("Test data input requires a class variable") too_many_folds = Msg("Number of folds exceeds the data size") class_inconsistent = Msg("Test and train data sets " "have different classes") class Warning(OWWidget.Warning): missing_data = Msg("Instances with unknown target values were removed from{}data") test_data_missing = Msg("Missing separate test data input") scores_not_computed = Msg("Some scores could not be computed") test_data_unused = Msg("Test data is present but unused. " "Select 'Test on test data' to use it.") class Information(OWWidget.Information): data_sampled = Msg("Train data has been sampled") test_data_sampled = Msg("Test data has been sampled") def __init__(self): super().__init__() self.data = None self.test_data = None self.preprocessor = None self.train_data_missing_vals = False self.test_data_missing_vals = False #: An Ordered dictionary with current inputs and their testing results. self.learners = OrderedDict() sbox = gui.vBox(self.controlArea, "Sampling") rbox = gui.radioButtons(sbox, self, "resampling", callback=self._param_changed) gui.appendRadioButton(rbox, "Cross validation") ibox = gui.indentedBox(rbox) gui.comboBox( ibox, self, "n_folds", label="Number of folds: ", items=[str(x) for x in self.NFolds], maximumContentsLength=3, orientation=Qt.Horizontal, callback=self.kfold_changed, ) gui.checkBox(ibox, self, "cv_stratified", "Stratified", callback=self.kfold_changed) gui.appendRadioButton(rbox, "Random sampling") ibox = gui.indentedBox(rbox) gui.comboBox( ibox, self, "n_repeats", label="Repeat train/test: ", items=[str(x) for x in self.NRepeats], maximumContentsLength=3, orientation=Qt.Horizontal, callback=self.shuffle_split_changed, ) gui.comboBox( ibox, self, "sample_size", label="Training set size: ", items=["{} %".format(x) for x in self.SampleSizes], maximumContentsLength=5, orientation=Qt.Horizontal, callback=self.shuffle_split_changed, ) gui.checkBox(ibox, self, "shuffle_stratified", "Stratified", callback=self.shuffle_split_changed) gui.appendRadioButton(rbox, "Leave one out") gui.appendRadioButton(rbox, "Test on train data") gui.appendRadioButton(rbox, "Test on test data") self.cbox = gui.vBox(self.controlArea, "Target Class") self.class_selection_combo = gui.comboBox( self.cbox, self, "class_selection", items=[], sendSelectedValue=True, valueType=str, callback=self._on_target_class_changed, contentsLength=8, ) gui.rubber(self.controlArea) self.view = gui.TableView(wordWrap=True) header = self.view.horizontalHeader() header.setResizeMode(QHeaderView.ResizeToContents) header.setDefaultAlignment(Qt.AlignCenter) header.setStretchLastSection(False) self.result_model = QStandardItemModel(self) self.result_model.setHorizontalHeaderLabels(["Method"]) self.view.setModel(self.result_model) self.view.setItemDelegate(ItemDelegate()) box = gui.vBox(self.mainArea, "Evaluation Results") box.layout().addWidget(self.view) def sizeHint(self): return QSize(780, 1) def set_learner(self, learner, key): """ Set the input `learner` for `key`. """ if key in self.learners and learner is None: # Removed del self.learners[key] else: self.learners[key] = Input(learner, None, None) self._invalidate([key]) def set_train_data(self, data): """ Set the input training dataset. """ self.Information.data_sampled.clear() if data and not data.domain.class_var: self.Error.class_required() data = None else: self.Error.class_required.clear() if isinstance(data, SqlTable): if data.approx_len() < AUTO_DL_LIMIT: data = Table(data) else: self.Information.data_sampled() data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(AUTO_DL_LIMIT, partial=True) data = Table(data_sample) self.train_data_missing_vals = data is not None and np.isnan(data.Y).any() if self.train_data_missing_vals or self.test_data_missing_vals: self.Warning.missing_data(self._which_missing_data()) if data: data = RemoveNaNClasses(data) else: self.Warning.missing_data.clear() self.data = data self.closeContext() if data is not None: self._update_class_selection() self.openContext(data.domain.class_var) self._invalidate() def set_test_data(self, data): """ Set the input separate testing dataset. """ self.Information.test_data_sampled.clear() if data and not data.domain.class_var: self.Error.class_required() data = None else: self.Error.class_required_test.clear() if isinstance(data, SqlTable): if data.approx_len() < AUTO_DL_LIMIT: data = Table(data) else: self.Information.test_data_sampled() data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(AUTO_DL_LIMIT, partial=True) data = Table(data_sample) self.test_data_missing_vals = data is not None and np.isnan(data.Y).any() if self.train_data_missing_vals or self.test_data_missing_vals: self.Warning.missing_data(self._which_missing_data()) if data: data = RemoveNaNClasses(data) else: self.Warning.missing_data.clear() self.test_data = data if self.resampling == OWTestLearners.TestOnTest: self._invalidate() def _which_missing_data(self): return {(True, True): " ", (True, False): " train ", (False, True): " test "}[ # both, don't specify (self.train_data_missing_vals, self.test_data_missing_vals) ] def set_preprocessor(self, preproc): """ Set the input preprocessor to apply on the training data. """ self.preprocessor = preproc self._invalidate() def handleNewSignals(self): """Reimplemented from OWWidget.handleNewSignals.""" self._update_class_selection() self.commit() def kfold_changed(self): self.resampling = OWTestLearners.KFold self._param_changed() def shuffle_split_changed(self): self.resampling = OWTestLearners.ShuffleSplit self._param_changed() def _param_changed(self): self._invalidate() def _update_results(self): """ Run/evaluate the learners. """ self.Warning.test_data_unused.clear() self.Warning.test_data_missing.clear() self.warning() self.Error.class_inconsistent.clear() self.Error.too_many_folds.clear() self.error() if self.data is None: return class_var = self.data.domain.class_var if self.resampling == OWTestLearners.TestOnTest: if self.test_data is None: self.Warning.test_data_missing() return elif self.test_data.domain.class_var != class_var: self.Error.class_inconsistent() return # items in need of an update items = [(key, slot) for key, slot in self.learners.items() if slot.results is None] learners = [slot.learner for _, slot in items] if len(items) == 0: return if self.test_data is not None and self.resampling != OWTestLearners.TestOnTest: self.Warning.test_data_unused() rstate = 42 def update_progress(finished): self.progressBarSet(100 * finished) common_args = dict(store_data=True, preprocessor=self.preprocessor, callback=update_progress, n_jobs=-1) self.setStatusMessage("Running") with self.progressBar(): try: folds = self.NFolds[self.n_folds] if self.resampling == OWTestLearners.KFold: if len(self.data) < folds: self.Error.too_many_folds() return warnings = [] results = Orange.evaluation.CrossValidation( self.data, learners, k=folds, random_state=rstate, warnings=warnings, **common_args ) if warnings: self.warning(warnings[0]) elif self.resampling == OWTestLearners.LeaveOneOut: results = Orange.evaluation.LeaveOneOut(self.data, learners, **common_args) elif self.resampling == OWTestLearners.ShuffleSplit: train_size = self.SampleSizes[self.sample_size] / 100 results = Orange.evaluation.ShuffleSplit( self.data, learners, n_resamples=self.NRepeats[self.n_repeats], train_size=train_size, test_size=None, stratified=self.shuffle_stratified, random_state=rstate, **common_args ) elif self.resampling == OWTestLearners.TestOnTrain: results = Orange.evaluation.TestOnTrainingData(self.data, learners, **common_args) elif self.resampling == OWTestLearners.TestOnTest: results = Orange.evaluation.TestOnTestData(self.data, self.test_data, learners, **common_args) else: assert False except (RuntimeError, ValueError) as e: self.error(str(e)) self.setStatusMessage("") return else: self.error() learner_key = {slot.learner: key for key, slot in self.learners.items()} for learner, result in zip(learners, results.split_by_model()): stats = None if class_var.is_discrete: scorers = classification_stats.scores elif class_var.is_continuous: scorers = regression_stats.scores else: scorers = None if scorers: ex = result.failed[0] if ex: stats = [Try.Fail(ex)] * len(scorers) result = Try.Fail(ex) else: stats = [Try(lambda: score(result)) for score in scorers] result = Try.Success(result) key = learner_key[learner] self.learners[key] = self.learners[key]._replace(results=result, stats=stats) self.setStatusMessage("") def _update_header(self): # Set the correct horizontal header labels on the results_model. headers = ["Method"] if self.data is not None: if self.data.domain.has_discrete_class: headers.extend(classification_stats.headers) else: headers.extend(regression_stats.headers) # remove possible extra columns from the model. for i in reversed(range(len(headers), self.result_model.columnCount())): self.result_model.takeColumn(i) self.result_model.setHorizontalHeaderLabels(headers) def _update_stats_model(self): # Update the results_model with up to date scores. # Note: The target class specific scores (if requested) are # computed as needed in this method. model = self.view.model() # clear the table model, but preserving the header labels for r in reversed(range(model.rowCount())): model.takeRow(r) target_index = None if self.data is not None: class_var = self.data.domain.class_var if self.data.domain.has_discrete_class and self.class_selection != self.TARGET_AVERAGE: target_index = class_var.values.index(self.class_selection) else: class_var = None errors = [] has_missing_scores = False for key, slot in self.learners.items(): name = learner_name(slot.learner) head = QStandardItem(name) head.setData(key, Qt.UserRole) if isinstance(slot.results, Try.Fail): head.setToolTip(str(slot.results.exception)) head.setText("{} (error)".format(name)) head.setForeground(QtGui.QBrush(Qt.red)) errors.append( "{name} failed with error:\n" "{exc.__class__.__name__}: {exc!s}".format(name=name, exc=slot.results.exception) ) row = [head] if class_var is not None and class_var.is_discrete and target_index is not None: if slot.results is not None and slot.results.success: ovr_results = results_one_vs_rest(slot.results.value, target_index) stats = [Try(lambda: score(ovr_results)) for score in classification_stats.scores] else: stats = None else: stats = slot.stats if stats is not None: for stat in stats: item = QStandardItem() if stat.success: item.setText("{:.3f}".format(stat.value[0])) else: item.setToolTip(str(stat.exception)) has_missing_scores = True row.append(item) model.appendRow(row) self.error("\n".join(errors), shown=bool(errors)) self.Warning.scores_not_computed(shown=has_missing_scores) def _update_class_selection(self): self.class_selection_combo.setCurrentIndex(-1) self.class_selection_combo.clear() if not self.data: return if self.data.domain.has_discrete_class: self.cbox.setVisible(True) class_var = self.data.domain.class_var items = [self.TARGET_AVERAGE] + class_var.values self.class_selection_combo.addItems(items) class_index = 0 if self.class_selection in class_var.values: class_index = class_var.values.index(self.class_selection) + 1 self.class_selection_combo.setCurrentIndex(class_index) self.class_selection = items[class_index] else: self.cbox.setVisible(False) def _on_target_class_changed(self): self._update_stats_model() def _invalidate(self, which=None): # Invalidate learner results for `which` input keys # (if None then all learner results are invalidated) if which is None: which = self.learners.keys() model = self.view.model() statmodelkeys = [model.item(row, 0).data(Qt.UserRole) for row in range(model.rowCount())] for key in which: self.learners[key] = self.learners[key]._replace(results=None, stats=None) if key in statmodelkeys: row = statmodelkeys.index(key) for c in range(1, model.columnCount()): item = model.item(row, c) if item is not None: item.setData(None, Qt.DisplayRole) item.setData(None, Qt.ToolTipRole) self.commit() def commit(self): """Recompute and output the results""" self._update_header() # Update the view to display the model names self._update_stats_model() self._update_results() self._update_stats_model() valid = [slot for slot in self.learners.values() if slot.results is not None and slot.results.success] if valid: # Evaluation results combined = results_merge([slot.results.value for slot in valid]) combined.learner_names = [learner_name(slot.learner) for slot in valid] # Predictions & Probabilities predictions = combined.get_augmented_data(combined.learner_names) else: combined = None predictions = None self.send("Evaluation Results", combined) self.send("Predictions", predictions) def send_report(self): """Report on the testing schema and results""" if not self.data or not self.learners: return if self.resampling == self.KFold: stratified = "Stratified " if self.cv_stratified else "" items = [("Sampling type", "{}{}-fold Cross validation".format(stratified, self.NFolds[self.n_folds]))] elif self.resampling == self.LeaveOneOut: items = [("Sampling type", "Leave one out")] elif self.resampling == self.ShuffleSplit: stratified = "Stratified " if self.shuffle_stratified else "" items = [ ( "Sampling type", "{}Shuffle split, {} random samples with {}% data ".format( stratified, self.NRepeats[self.n_repeats], self.SampleSizes[self.sample_size] ), ) ] elif self.resampling == self.TestOnTrain: items = [("Sampling type", "No sampling, test on training data")] elif self.resampling == self.TestOnTest: items = [("Sampling type", "No sampling, test on testing data")] else: items = [] if self.data.domain.has_discrete_class: items += [("Target class", self.class_selection.strip("()"))] if items: self.report_items("Settings", items) self.report_table("Scores", self.view)
class QgsAttributeTableDialog(QDialog): ''' classdocs ''' def __init__(self, parent, vectorlayer): QDialog.__init__(self, parent) # self.w = QDialog(self) self.baseLayer = vectorlayer self.canChangeAttributes = self.validate( QgsVectorDataProvider.ChangeAttributeValues) self.canDeleteFeatures = self.validate( QgsVectorDataProvider.DeleteFeatures) self.canAddAttributes = self.validate( QgsVectorDataProvider.AddAttributes) self.canDeleteAttributes = self.validate( QgsVectorDataProvider.DeleteAttributes) self.canAddFeatures = self.validate(QgsVectorDataProvider.AddFeatures) gridLayout = QGridLayout(self) self.setLayout(gridLayout) self.setWindowTitle("Attribute Table") self.setFixedSize(QSize(800, 600)) editorToolbar = QToolBar() editorToolbar.setFixedSize(QSize(768, 48)) self.actionToggleEditing = QAction(QIcon("Resource\\edit.png"), "ToggleEditing", self) self.actionToggleEditing.triggered.connect(self.toggleEditing) if (self.canChangeAttributes or self.canDeleteFeatures or self.canAddAttributes or self.canDeleteAttributes or self.canAddFeatures) and (not self.baseLayer.isReadOnly()): self.actionToggleEditing.setEnabled(True) else: self.actionToggleEditing.setEnabled(False) self.actionToggleEditing.setCheckable(True) editorToolbar.addAction(self.actionToggleEditing) self.actionSave = QAction(QIcon("Resource\\filesave.png"), "Save Edits", self) self.actionSave.triggered.connect(self.saveEditing) self.actionSave.setCheckable(False) self.actionSave.setEnabled(False) editorToolbar.addAction(self.actionSave) self.actiondeleteRows = QAction( QIcon("Resource\\mActionDeleteSelected.png"), "Delete selected features", self) self.actiondeleteRows.triggered.connect(self.deleteRows) self.actiondeleteRows.setCheckable(False) self.actiondeleteRows.setEnabled(False) editorToolbar.addAction(self.actiondeleteRows) self.actionUnselectAll = QAction( QIcon("Resource\\mActionDeselectAll.png"), "Unselect All", self) self.actionUnselectAll.triggered.connect(self.unselectAll) self.actionUnselectAll.setCheckable(False) self.actionUnselectAll.setEnabled(True) editorToolbar.addAction(self.actionUnselectAll) self.actionSelectedToZoom = QAction( QIcon("Resource\\mActionZoomToSelected.png"), "Zoom map to the selected rows", self) self.actionSelectedToZoom.triggered.connect(self.selectedToZoom) self.actionSelectedToZoom.setCheckable(False) self.actionSelectedToZoom.setEnabled(True) editorToolbar.addAction(self.actionSelectedToZoom) gridLayout.addWidget(editorToolbar, 0, 0, 1, 1) self.model = QStandardItemModel() # self.model.itemChanged.connect(self.attributeChanged) self.attributeTable = QTableView(self) self.attributeTable.setModel(self.model) self.attributeTable.setColumnWidth(0, 200) self.attributeTable.setColumnWidth(1, 160) self.attributeTable.clicked.connect(self.tableSelectionChanged) self.attributeTable.setSortingEnabled(True) self.changeItemList = [] self.selectRows = [] self.isSave = True self.initTable() gridLayout.addWidget(self.attributeTable, 1, 0, 1, 1) # self.attributeTable.selectionChanged.connect(self.selectFeature) def tableSelectionChanged(self): idxList = self.attributeTable.selectedIndexes() if idxList != None and len(idxList) > 0: self.baseLayer.removeSelection() fidList = [] for idx in idxList: fid = int(self.model.item(idx.row()).text()) fidList.append(fid) self.baseLayer.setSelectedFeatures(fidList) def toggleEditing(self): if self.baseLayer != None: if not self.actionToggleEditing.isChecked(): if self.isSave: self.baseLayer.commitChanges() self.actionSave.setEnabled(False) self.actiondeleteRows.setEnabled(False) self.model.itemChanged.disconnect(self.attributeChanged) self.toggleEditingTable(False) else: button = QMessageBox.warning( self, "Stop Editing", "Do you want to save the changes to layer?", QMessageBox.Save | QMessageBox.Discard | QMessageBox.Cancel) if (button == QMessageBox.Cancel): self.actionToggleEditing.setChecked(True) elif button == QMessageBox.Save: self.saveEditing() self.baseLayer.commitChanges() self.actionSave.setEnabled(False) self.actiondeleteRows.setEnabled(False) self.model.itemChanged.disconnect( self.attributeChanged) self.toggleEditingTable(False) else: self.initTable() self.baseLayer.commitChanges() self.actionSave.setEnabled(False) self.actiondeleteRows.setEnabled(False) self.model.itemChanged.disconnect( self.attributeChanged) self.toggleEditingTable(False) # self.isEditable = False else: self.actionSave.setEnabled(True) self.actiondeleteRows.setEnabled(True) self.baseLayer.startEditing() self.toggleEditingTable(True) self.model.itemChanged.connect(self.attributeChanged) # self.isEditable = True def toggleEditingTable(self, isEditable): columnCount = self.model.columnCount() rowCount = self.model.rowCount() col = 0 while col < columnCount: row = 0 while row < rowCount: self.model.item(row, col).setEditable(isEditable) row += 1 col += 1 def attributeChanged(self, standardItem): self.isSave = False # if not self.isDelete: self.changeItemList.append(standardItem) # featureId = standardItem.row() # self.baseLayer.changeAttributeValue(featureId, # standardItem.column(), standardItem.text()) def saveEditing(self): self.isSave = True if len(self.changeItemList) > 0: for standardItem in self.changeItemList: featureId = standardItem.row() self.baseLayer.changeAttributeValue(featureId, standardItem.column(), standardItem.text()) self.changeItemList = [] if len(self.selectRows) > 0: for id in self.selectRows: self.baseLayer.deleteFeature(id) self.selectRows = [] def initTable(self): self.model.clear() # header = QHeaderView(Qt.Horizontal) # headerModel = QStandardItemModel() layer = self.baseLayer fields = layer.pendingFields() headersList = ["fid"] for field in fields: headersList.append(field.name()) self.model.setHorizontalHeaderLabels(headersList) # headerModel.setHorizontalHeaderLabels(headersList) # header.setModel(headerModel) # self.attributeTable.setHorizontalHeader(header) if len(layer.selectedFeatures()) > 0: features = layer.selectedFeatures() else: features = layer.getFeatures() for feature in features: record = [QStandardItem(str(feature.id()))] for field in feature.fields(): name = field.name() attribute = feature.attribute(name).toString() stdItemValue = QStandardItem(attribute) stdItemValue.setEditable(False) record.append(stdItemValue) self.model.appendRow(record) def deleteRows(self): if len(self.attributeTable.selectedIndexes()) > 0: self.isSave = False selectedIndexs = self.attributeTable.selectedIndexes() k = -1 self.selectRows = [] for index in selectedIndexs: if k != index.row(): k = index.row() self.selectRows.append(k) for row in self.selectRows: self.model.takeRow(row) def unselectAll(self): if len(self.attributeTable.selectedIndexes()) > 0: self.attributeTable.clearSelection() def selectedToZoom(self): if len(self.attributeTable.selectedIndexes()) > 0: self.baseLayer.removeSelection() selectedIndexs = self.attributeTable.selectedIndexes() k = -1 self.selectRows = [] for index in selectedIndexs: if k != index.row(): k = index.row() self.selectRows.append(k) self.baseLayer.setSelectedFeatures(self.selectRows) # for row in self.selectRows: # self.model.takeRow(row) define._canvas.zoomToSelected(self.baseLayer) def validate(self, condition): if self.baseLayer.dataProvider().capabilities() & condition: return True else: return False
class OWTestLearners(widget.OWWidget): name = "Test Learners" description = "" icon = "icons/TestLearners1.svg" priority = 100 inputs = [("Learner", Orange.classification.Fitter, "set_learner", widget.Multiple), ("Data", Orange.data.Table, "set_train_data", widget.Default), ("Test Data", Orange.data.Table, "set_test_data")] outputs = [("Evaluation Results", testing.Results)] #: Resampling/testing types KFold, LeaveOneOut, Bootstrap, TestOnTrain, TestOnTest = 0, 1, 2, 3, 4 #: Selected resampling type resampling = settings.Setting(0) #: Number of folds for K-fold cross validation k_folds = settings.Setting(10) #: Number of repeats for bootstrap sampling n_repeat = settings.Setting(10) #: Bootstrap sampling p sample_p = settings.Setting(75) def __init__(self, parent=None): super().__init__(parent) self.train_data = None self.test_data = None #: An Ordered dictionary with current inputs and their testing #: results. self.learners = OrderedDict() sbox = gui.widgetBox(self.controlArea, "Sampling") rbox = gui.radioButtons( sbox, self, "resampling", callback=self._param_changed ) gui.appendRadioButton(rbox, "Cross validation") ibox = gui.indentedBox(rbox) gui.spin(ibox, self, "k_folds", 2, 50, label="Number of folds:", callback=self._param_changed) gui.appendRadioButton(rbox, "Leave one out") gui.appendRadioButton(rbox, "Random sampling") ibox = gui.indentedBox(rbox) gui.spin(ibox, self, "n_repeat", 2, 50, label="Repeat train/test", callback=self._param_changed) gui.widgetLabel(ibox, "Relative training set size:") gui.hSlider(ibox, self, "sample_p", minValue=1, maxValue=100, ticks=20, vertical=False, callback=self._param_changed) gui.appendRadioButton(rbox, "Test on train data") gui.appendRadioButton(rbox, "Test on test data") rbox.layout().addSpacing(5) gui.button(rbox, self, "Apply", callback=self.apply) gui.rubber(self.controlArea) self.view = QTreeView( rootIsDecorated=False, uniformRowHeights=True, wordWrap=True, editTriggers=QTreeView.NoEditTriggers ) self.result_model = QStandardItemModel() self.view.setModel(self.result_model) self._update_header() box = gui.widgetBox(self.mainArea, "Evaluation Results") box.layout().addWidget(self.view) def set_learner(self, learner, key): if key in self.learners and learner is None: del self.learners[key] else: self.learners[key] = Input(learner, None, ()) self._update_stats_model() def set_train_data(self, data): self.train_data = data self._update_header() self._invalidate() def set_test_data(self, data): self.test_data = data if self.resampling == OWTestLearners.TestOnTest: self._invalidate() def handleNewSignals(self): self.update_results() self.commit() def _param_changed(self): self._invalidate() def update_results(self): self.warning(1, "") if self.train_data is None: return # items in need of an update items = [(key, input) for key, input in self.learners.items() if input.results is None] learners = [input.learner for _, input in items] self.setStatusMessage("Running") if self.test_data is not None and \ self.resampling != OWTestLearners.TestOnTest: self.warning(1, "Select 'Test on test data' to use the test data") # TODO: Test each learner individually if self.resampling == OWTestLearners.KFold: results = testing.CrossValidation( self.train_data, learners, k=self.k_folds, store_data=True ) elif self.resampling == OWTestLearners.LeaveOneOut: results = testing.LeaveOneOut( self.train_data, learners, store_data=True ) elif self.resampling == OWTestLearners.Bootstrap: p = self.sample_p / 100.0 results = testing.Bootstrap( self.train_data, learners, n_resamples=self.n_repeat, p=p, store_data=True ) elif self.resampling == OWTestLearners.TestOnTrain: results = testing.TestOnTrainingData( self.train_data, learners, store_data=True ) elif self.resampling == OWTestLearners.TestOnTest: if self.test_data is None: return results = testing.TestOnTestData( self.train_data, self.test_data, learners, store_data=True ) else: assert False results = list(split_by_model(results)) class_var = self.train_data.domain.class_var if is_discrete(class_var): test_stats = classification_stats else: test_stats = regression_stats self._update_header() stats = [test_stats(res) for res in results] for (key, input), res, stat in zip(items, results, stats): self.learners[key] = input._replace(results=res, stats=stat) self.setStatusMessage("") self._update_stats_model() def _update_header(self): headers = ["Method"] if self.train_data is not None: if is_discrete(self.train_data.domain.class_var): if len(self.train_data.domain.class_var.values) == 2: headers.extend(classification_stats.headers_binary) headers.extend(classification_stats.headers) else: headers.extend(regression_stats.headers) for i in reversed(range(len(headers), self.result_model.columnCount())): self.result_model.takeColumn(i) self.result_model.setHorizontalHeaderLabels(headers) def _update_stats_model(self): model = self.view.model() for r in reversed(range(model.rowCount())): model.takeRow(r) for input in self.learners.values(): name = learner_name(input.learner) row = [] head = QStandardItem() head.setData(name, Qt.DisplayRole) row.append(head) for stat in input.stats: item = QStandardItem() item.setData(float(stat[0]), Qt.DisplayRole) row.append(item) model.appendRow(row) self.view.resizeColumnToContents(0) def _invalidate(self, which=None): if which is None: which = self.learners.keys() all_keys = list(self.learners.keys()) model = self.view.model() for key in which: self.learners[key] = \ self.learners[key]._replace(results=None, stats=None) if key in self.learners: row = all_keys.index(key) for c in range(1, model.columnCount()): item = model.item(row, c) if item is not None: item.setData(None, Qt.DisplayRole) def apply(self): self.update_results() self.commit() def commit(self): results = [val.results for val in self.learners.values() if val.results is not None] if results: combined = results_merge(results) combined.fitter_names = [learner_name(val.learner) for val in self.learners.values()] else: combined = None self.send("Evaluation Results", combined)