def find_rules(self): if self.data is None: return data = self.data self.table.model().clear() n_examples = len(data) NumericItem = self.NumericItem StandardItem = self.StandardItem filterSearch = self.filterSearch itemsetMin = self.filterAntecedentMin + self.filterConsequentMin itemsetMax = self.filterAntecedentMax + self.filterConsequentMax isSizeMatch = self.isSizeMatch isRegexMatch = self.isRegexMatch X, mapping = OneHot.encode(data, self.classify) self.onehot_mapping = mapping names = {item: '{}={}'.format(var.name, val) for item, var, val in OneHot.decode(mapping, data, mapping)} # Items that consequent must include if classifying class_items = {item for item, var, val in OneHot.decode(mapping, data, mapping) if var is data.domain.class_var} if self.classify else set() assert bool(class_items) == bool(self.classify) model = QStandardItemModel(self.table) for col, (label, tooltip) in enumerate([("Supp", "Support"), ("Conf", "Confidence (support / antecedent support)"), ("Covr", "Coverage (antecedent support / number of examples)"), ("Strg", "Strength (consequent support / antecedent support)"), ("Lift", "Lift (number of examples * confidence / consequent support)"), ("Levr", "Leverage ((support * number of examples - antecedent support * consequent support) / (number of examples)²)"), ("Antecedent", None), ("", None), ("Consequent", None)]): item = QStandardItem(label) item.setToolTip(tooltip) model.setHorizontalHeaderItem(col, item) #~ # Aggregate rules by common (support,confidence) for scatterplot #~ scatter_agg = defaultdict(list) # Find itemsets nRules = 0 itemsets = {} progress = gui.ProgressBar(self, self.maxRules + 1) for itemset, support in frequent_itemsets(X, self.minSupport / 100): itemsets[itemset] = support if class_items and not class_items & itemset: continue # Filter itemset by joined filters before descending into it itemset_str = ' '.join(names[i] for i in itemset) if (filterSearch and (len(itemset) < itemsetMin or itemsetMax < len(itemset) or not isRegexMatch(itemset_str, itemset_str))): continue for rule in gen_assoc_rules(itemsets, self.minConfidence / 100, itemset): (left, right), support, confidence = rule if class_items and right - class_items: continue if filterSearch and not isSizeMatch(len(left), len(right)): continue left_str = ' '.join(names[i] for i in sorted(left)) right_str = ' '.join(names[i] for i in sorted(right)) if filterSearch and not isRegexMatch(left_str, right_str): continue # All filters matched, calculate stats and add table row _, _, _, coverage, strength, lift, leverage = next( rules_stats((rule,), itemsets, n_examples)) support_item = NumericItem(support / n_examples) # Set row data on first column support_item.setData((itemset - class_items, class_items and (class_items & itemset).pop()), self.ROW_DATA_ROLE) left_item = StandardItem(left_str, len(left)) left_item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) model.appendRow([support_item, NumericItem(confidence), NumericItem(coverage), NumericItem(strength), NumericItem(lift), NumericItem(leverage), left_item, StandardItem('→'), StandardItem(right_str, len(right))]) #~ scatter_agg[(round(support / n_examples, 2), round(confidence, 2))].append((left, right)) nRules += 1 progress.advance() if nRules >= self.maxRules: break if nRules >= self.maxRules: break # Populate the TableView table = self.table table.setHidden(True) table.setSortingEnabled(False) proxy_model = self.proxy_model proxy_model.setSourceModel(model) table.setModel(proxy_model) for i in range(model.columnCount()): table.resizeColumnToContents(i) table.setSortingEnabled(True) table.setHidden(False) progress.finish() self.nRules = nRules self.nFilteredRules = proxy_model.rowCount() # TODO: continue; also add in owitemsets self.nSelectedRules = 0 self.nSelectedExamples = 0
def find_rules(self): if self.data is None or not len(self.data): return if self._is_running: return self._is_running = True data = self.data self.table.model().clear() n_examples = len(data) NumericItem = self.NumericItem StandardItem = self.StandardItem filterSearch = self.filterSearch itemsetMin = self.filterAntecedentMin + self.filterConsequentMin itemsetMax = self.filterAntecedentMax + self.filterConsequentMax isSizeMatch = self.isSizeMatch isRegexMatch = self.isRegexMatch X, mapping = OneHot.encode(data, self.classify) self.error(911) if X is None: self.error(911, 'Need some discrete data to work with.') self.onehot_mapping = mapping ITEM_FMT = '{}' if issparse(data.X) else '{}={}' names = { item: ('{}={}' if var is data.domain.class_var else ITEM_FMT).format( var.name, val) for item, var, val in OneHot.decode(mapping, data, mapping) } # Items that consequent must include if classifying class_items = { item for item, var, val in OneHot.decode(mapping, data, mapping) if var is data.domain.class_var } if self.classify else set() assert bool(class_items) == bool(self.classify) model = QStandardItemModel(self.table) for col, (label, tooltip) in enumerate([ ("Supp", "Support"), ("Conf", "Confidence (support / antecedent support)"), ("Covr", "Coverage (antecedent support / number of examples)"), ("Strg", "Strength (consequent support / antecedent support)"), ("Lift", "Lift (number of examples * confidence / consequent support)"), ("Levr", "Leverage ((support * number of examples - antecedent support * consequent support) / (number of examples)²)" ), ("Antecedent", None), ("", None), ("Consequent", None) ]): item = QStandardItem(label) item.setToolTip(tooltip) model.setHorizontalHeaderItem(col, item) #~ # Aggregate rules by common (support,confidence) for scatterplot #~ scatter_agg = defaultdict(list) # Find itemsets nRules = 0 itemsets = {} with self.progressBar(self.maxRules + 1) as progress: for itemset, support in frequent_itemsets(X, self.minSupport / 100): itemsets[itemset] = support if class_items and not class_items & itemset: continue # Filter itemset by joined filters before descending into it itemset_str = ' '.join(names[i] for i in itemset) if (filterSearch and (len(itemset) < itemsetMin or itemsetMax < len(itemset) or not isRegexMatch(itemset_str, itemset_str))): continue for rule in association_rules(itemsets, self.minConfidence / 100, itemset): left, right, support, confidence = rule if class_items and right - class_items: continue if filterSearch and not isSizeMatch(len(left), len(right)): continue left_str = ', '.join(names[i] for i in sorted(left)) right_str = ', '.join(names[i] for i in sorted(right)) if filterSearch and not isRegexMatch(left_str, right_str): continue # All filters matched, calculate stats and add table row _, _, _, _, coverage, strength, lift, leverage = next( rules_stats((rule, ), itemsets, n_examples)) support_item = NumericItem(support / n_examples) # Set row data on first column support_item.setData( (itemset - class_items, class_items and (class_items & itemset).pop()), self.ROW_DATA_ROLE) left_item = StandardItem(left_str, len(left)) left_item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) model.appendRow([ support_item, NumericItem(confidence), NumericItem(coverage), NumericItem(strength), NumericItem(lift), NumericItem(leverage), left_item, StandardItem('→'), StandardItem(right_str, len(right)) ]) #~ scatter_agg[(round(support / n_examples, 2), round(confidence, 2))].append((left, right)) nRules += 1 progress.advance() if nRules >= self.maxRules: break if nRules >= self.maxRules: break # Populate the TableView table = self.table table.setHidden(True) table.setSortingEnabled(False) proxy_model = self.proxy_model proxy_model.setSourceModel(model) table.setModel(proxy_model) for i in range(model.columnCount()): table.resizeColumnToContents(i) table.setSortingEnabled(True) table.setHidden(False) self.nRules = nRules self.nFilteredRules = proxy_model.rowCount( ) # TODO: continue; also add in owitemsets self.nSelectedRules = 0 self.nSelectedExamples = 0 self._is_running = False
def find_itemsets(self): if self.data is None: return if self._is_running: return self._is_running = True data = self.data self.tree.clear() self.tree.setUpdatesEnabled(False) self.tree.blockSignals(True) class ItemDict(dict): def __init__(self, item): self.item = item top = ItemDict(self.tree.invisibleRootItem()) X, mapping = OneHot.encode(data) self.onehot_mapping = mapping ITEM_FMT = '{}' if issparse(data.X) else '{}={}' names = { item: ITEM_FMT.format(var.name, val) for item, var, val in OneHot.decode(mapping.keys(), data, mapping) } nItemsets = 0 filterSearch = self.filterSearch filterMinItems, filterMaxItems = self.filterMinItems, self.filterMaxItems isRegexMatch = self.isRegexMatch # Find itemsets and populate the TreeView with self.progressBar(self.maxItemsets + 1) as progress: for itemset, support in frequent_itemsets(X, self.minSupport / 100): if filterSearch and not filterMinItems <= len( itemset) <= filterMaxItems: continue parent = top first_new_item = None itemset_matches_filter = False for item in sorted(itemset): name = names[item] if filterSearch and not itemset_matches_filter: itemset_matches_filter = isRegexMatch(name) child = parent.get(name) if child is None: try: wi = self.TreeWidgetItem(parent.item, [ name, str(support), '{:.4g}'.format( 100 * support / len(data)) ]) except RuntimeError: # FIXME: When autoFind was in effect and the support # slider was moved, this line excepted with: # RuntimeError: wrapped C/C++ object of type # TreeWidgetItem has been deleted return wi.setData(0, self.ITEM_DATA_ROLE, item) child = parent[name] = ItemDict(wi) if first_new_item is None: first_new_item = (parent, name) parent = child if filterSearch and not itemset_matches_filter: parent, name = first_new_item parent.item.removeChild(parent[name].item) del parent[name].item del parent[name] else: nItemsets += 1 progress.advance() if nItemsets >= self.maxItemsets: break if not filterSearch: self.filter_change() self.nItemsets = nItemsets self.nSelectedItemsets = 0 self.nSelectedExamples = 0 self.tree.expandAll() for i in range(self.tree.columnCount()): self.tree.resizeColumnToContents(i) self.tree.setUpdatesEnabled(True) self.tree.blockSignals(False) self._is_running = False
def find_itemsets(self): if self.data is None: return data = self.data self.tree.clear() self.tree.setUpdatesEnabled(False) self.tree.blockSignals(True) class ItemDict(dict): def __init__(self, item): self.item = item top = ItemDict(self.tree.invisibleRootItem()) X, mapping = OneHot.encode(data) self.onehot_mapping = mapping names = {item: '{}={}'.format(var.name, val) for item, var, val in OneHot.decode(mapping.keys(), data, mapping)} nItemsets = 0 filterSearch = self.filterSearch filterMinItems, filterMaxItems = self.filterMinItems, self.filterMaxItems isRegexMatch = self.isRegexMatch # Find itemsets and populate the TreeView progress = gui.ProgressBar(self, self.maxItemsets + 1) for itemset, support in frequent_itemsets(X, self.minSupport / 100): if filterSearch and not filterMinItems <= len(itemset) <= filterMaxItems: continue parent = top first_new_item = None itemset_matches_filter = False for item in sorted(itemset): name = names[item] if filterSearch and not itemset_matches_filter: itemset_matches_filter = isRegexMatch(name) child = parent.get(name) if child is None: wi = self.TreeWidgetItem(parent.item, [name, str(support), '{:.1f}'.format(100 * support / len(data))]) wi.setData(0, self.ITEM_DATA_ROLE, item) child = parent[name] = ItemDict(wi) if first_new_item is None: first_new_item = (parent, name) parent = child if filterSearch and not itemset_matches_filter: parent, name = first_new_item parent.item.removeChild(parent[name].item) del parent[name].item del parent[name] else: nItemsets += 1 progress.advance() if nItemsets >= self.maxItemsets: break if not filterSearch: self.filter_change() self.nItemsets = nItemsets self.nSelectedItemsets = 0 self.nSelectedExamples = 0 self.tree.expandAll() for i in range(self.tree.columnCount()): self.tree.resizeColumnToContents(i) self.tree.setUpdatesEnabled(True) self.tree.blockSignals(False) progress.finish()
def find_itemsets(self): if self.data is None: return if self._is_running: return self._is_running = True data = self.data self.tree.clear() self.tree.setUpdatesEnabled(False) self.tree.blockSignals(True) class ItemDict(dict): def __init__(self, item): self.item = item top = ItemDict(self.tree.invisibleRootItem()) X, mapping = OneHot.encode(data) self.onehot_mapping = mapping ITEM_FMT = "{}" if issparse(data.X) else "{}={}" names = { item: ITEM_FMT.format(var.name, val) for item, var, val in OneHot.decode(mapping.keys(), data, mapping) } nItemsets = 0 filterSearch = self.filterSearch filterMinItems, filterMaxItems = self.filterMinItems, self.filterMaxItems isRegexMatch = self.isRegexMatch # Find itemsets and populate the TreeView with self.progressBar(self.maxItemsets + 1) as progress: for itemset, support in frequent_itemsets(X, self.minSupport / 100): if filterSearch and not filterMinItems <= len(itemset) <= filterMaxItems: continue parent = top first_new_item = None itemset_matches_filter = False for item in sorted(itemset): name = names[item] if filterSearch and not itemset_matches_filter: itemset_matches_filter = isRegexMatch(name) child = parent.get(name) if child is None: try: wi = self.TreeWidgetItem( parent.item, [name, str(support), "{:.4g}".format(100 * support / len(data))] ) except RuntimeError: # FIXME: When autoFind was in effect and the support # slider was moved, this line excepted with: # RuntimeError: wrapped C/C++ object of type # TreeWidgetItem has been deleted return wi.setData(0, self.ITEM_DATA_ROLE, item) child = parent[name] = ItemDict(wi) if first_new_item is None: first_new_item = (parent, name) parent = child if filterSearch and not itemset_matches_filter: parent, name = first_new_item parent.item.removeChild(parent[name].item) del parent[name].item del parent[name] else: nItemsets += 1 progress.advance() if nItemsets >= self.maxItemsets: break if not filterSearch: self.filter_change() self.nItemsets = nItemsets self.nSelectedItemsets = 0 self.nSelectedExamples = 0 self.tree.expandAll() for i in range(self.tree.columnCount()): self.tree.resizeColumnToContents(i) self.tree.setUpdatesEnabled(True) self.tree.blockSignals(False) self._is_running = False
def find_rules(self): if self.data is None or not len(self.data): return if self._is_running: self._is_running = False return self.button.button.setText('Cancel') self._is_running = True data = self.data self.table.model().clear() n_examples = len(data) NumericItem = self.NumericItem StandardItem = self.StandardItem filterSearch = self.filterSearch itemsetMin = self.filterAntecedentMin + self.filterConsequentMin itemsetMax = self.filterAntecedentMax + self.filterConsequentMax isSizeMatch = self.isSizeMatch isRegexMatch = self.isRegexMatch X, mapping = OneHot.encode(data, self.classify) self.Error.need_discrete_data.clear() if X is None: self.Error.need_discrete_data() self.onehot_mapping = mapping ITEM_FMT = '{}' if issparse(data.X) else '{}={}' names = {item: ('{}={}' if var is data.domain.class_var else ITEM_FMT).format(var.name, val) for item, var, val in OneHot.decode(mapping, data, mapping)} # Items that consequent must include if classifying class_items = {item for item, var, val in OneHot.decode(mapping, data, mapping) if var is data.domain.class_var} if self.classify else set() assert bool(class_items) == bool(self.classify) model = QStandardItemModel(self.table) for col, (label, _, tooltip) in enumerate(self.header): item = QStandardItem(label) item.setToolTip(tooltip) model.setHorizontalHeaderItem(col, item) # Find itemsets nRules = 0 itemsets = {} ARROW_ITEM = StandardItem('→') ARROW_ITEM.setTextAlignment(Qt.AlignCenter) with self.progressBar(self.maxRules + 1) as progress: for itemset, support in frequent_itemsets(X, self.minSupport / 100): itemsets[itemset] = support if class_items and not class_items & itemset: continue # Filter itemset by joined filters before descending into it itemset_str = ' '.join(names[i] for i in itemset) if (filterSearch and (len(itemset) < itemsetMin or itemsetMax < len(itemset) or not isRegexMatch(itemset_str, itemset_str))): continue for rule in association_rules(itemsets, self.minConfidence / 100, itemset): left, right, support, confidence = rule if class_items and right - class_items: continue if filterSearch and not isSizeMatch(len(left), len(right)): continue left_str = ', '.join(names[i] for i in sorted(left)) right_str = ', '.join(names[i] for i in sorted(right)) if filterSearch and not isRegexMatch(left_str, right_str): continue # All filters matched, calculate stats and add table row _, _, _, _, coverage, strength, lift, leverage = next( rules_stats((rule,), itemsets, n_examples)) support_item = NumericItem(support / n_examples) # Set row data on first column support_item.setData((itemset - class_items, class_items and (class_items & itemset).pop()), self.ROW_DATA_ROLE) left_item = StandardItem(left_str, len(left)) left_item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) model.appendRow([support_item, NumericItem(confidence), NumericItem(coverage), NumericItem(strength), NumericItem(lift), NumericItem(leverage), left_item, ARROW_ITEM.clone(), StandardItem(right_str, len(right))]) nRules += 1 progress.advance() if not self._is_running or nRules >= self.maxRules: break qApp.processEvents() if not self._is_running or nRules >= self.maxRules: break # Populate the TableView table = self.table table.setHidden(True) table.setSortingEnabled(False) proxy_model = self.proxy_model proxy_model.setSourceModel(model) table.setModel(proxy_model) for i in range(model.columnCount()): table.resizeColumnToContents(i) table.setSortingEnabled(True) table.setHidden(False) self.table_rules = proxy_model.get_data() if self.table_rules is not None: self.Outputs.rules.send(self.table_rules) self.button.button.setText('Find Rules') self.nRules = nRules self.nFilteredRules = proxy_model.rowCount() # TODO: continue; also add in owitemsets self.nSelectedRules = 0 self.nSelectedExamples = 0 self._is_running = False