def ResultDFToSave(rules): #根据Qrange3关联分析生成的规则得到并返回对于的DataFrame数据结构的函数 returnRules = [] for i in rules: temList = [] temStr = '' for j in i[0]: #处理第一个frozenset temStr = temStr + str(j) + '&' temStr = temStr[:-1] temStr = temStr + ' ==> ' for j in i[1]: temStr = temStr + str(j) + '&' temStr = temStr[:-1] temList.append(temStr) temList.append(i[2]) temList.append(i[3]) temList.append(i[4]) temList.append(i[5]) temList.append(i[6]) temList.append(i[7]) returnRules.append(temList) return pd.DataFrame(returnRules, columns=('规则', '项集出现数目', '置信度', '覆盖度', '力度', '提升度', '利用度')) supportRate = 0.02 confidenceRate = 0.5 itemsets = dict(oaf.frequent_itemsets(listToAnalysis, supportRate)) rules = oaf.association_rules(itemsets, confidenceRate) rules = list(rules) regularNum = len(rules) printRules = dealRules(rules) result = list(oaf.rules_stats( rules, itemsets, len(listToAnalysis))) #下面这个函数改变了rules,把rules用完了! printResult = dealResult(result) #################################################下面将结果保存成excel格式的文件 dfToSave = ResultDFToSave(result) saveRegularName = str(supportRate) + '支持度_' + str( confidenceRate) + '置信度_产生了' + str(regularNum) + '条规则' + '.xlsx' dfToSave.to_excel(saveRegularName) #######################################################下面是根据不同置信度和关联度得到关联规则数目 listTable = [] supportRate = 0.01 confidenceRate = 0.1 for i in range(9): support = supportRate * (i + 1) listS = [] for j in range(9): confidence = confidenceRate * (j + 1) itemsets = dict(oaf.frequent_itemsets(listToAnalysis, support)) rules = list(oaf.association_rules(itemsets, confidence)) listS.append(len(rules)) listTable.append(listS) dfList = pd.DataFrame(listTable, index=[supportRate * (i + 1) for i in range(9)], columns=[confidenceRate * (i + 1) for i in range(9)]) dfList.to_excel('regularNum.xlsx')
temList.append(i[7]) returnRules.append(temList) return pd.DataFrame(returnRules, columns=('规则', '项集出现数目', '置信度', '支持度', '力度', '提升度', '利用度')) if __name__ == '__main__': supportRate = 0.004 confidenceRate = 0.6 itemsets = dict(oaf.frequent_itemsets(ryzd, supportRate)) rules = oaf.association_rules(itemsets, confidenceRate) rules = list(rules) regularNum = len(rules) printRules = dealRules(rules) result = list(oaf.rules_stats(rules, itemsets, len(ryzd))) #下面这个函数改变了rules,把rules用完了! printResult = dealResult(result) ################################################# # 下面将结果保存成excel格式的文件 dfToSave = ResultDFToSave(result) dfToSave.to_excel(r'C:\Users\Administrator\Desktop\2.xlsx') ####################################################### # 下面是根据不同置信度和关联度得到关联规则数目 listTable = [] supportRate = 0.001 confidenceRate = 0.1 for i in range(9): support = supportRate * (i + 1)
def find_rules(self): if self.data is None: return data = self.data self.table.model().clear() n_examples = len(data) NumericItem = self.NumericItem StandardItem = self.StandardItem filterSearch = self.filterSearch itemsetMin = self.filterAntecedentMin + self.filterConsequentMin itemsetMax = self.filterAntecedentMax + self.filterConsequentMax isSizeMatch = self.isSizeMatch isRegexMatch = self.isRegexMatch X, mapping = OneHot.encode(data, self.classify) self.onehot_mapping = mapping names = {item: '{}={}'.format(var.name, val) for item, var, val in OneHot.decode(mapping, data, mapping)} # Items that consequent must include if classifying class_items = {item for item, var, val in OneHot.decode(mapping, data, mapping) if var is data.domain.class_var} if self.classify else set() assert bool(class_items) == bool(self.classify) model = QStandardItemModel(self.table) for col, (label, tooltip) in enumerate([("Supp", "Support"), ("Conf", "Confidence (support / antecedent support)"), ("Covr", "Coverage (antecedent support / number of examples)"), ("Strg", "Strength (consequent support / antecedent support)"), ("Lift", "Lift (number of examples * confidence / consequent support)"), ("Levr", "Leverage ((support * number of examples - antecedent support * consequent support) / (number of examples)²)"), ("Antecedent", None), ("", None), ("Consequent", None)]): item = QStandardItem(label) item.setToolTip(tooltip) model.setHorizontalHeaderItem(col, item) #~ # Aggregate rules by common (support,confidence) for scatterplot #~ scatter_agg = defaultdict(list) # Find itemsets nRules = 0 itemsets = {} progress = gui.ProgressBar(self, self.maxRules + 1) for itemset, support in frequent_itemsets(X, self.minSupport / 100): itemsets[itemset] = support if class_items and not class_items & itemset: continue # Filter itemset by joined filters before descending into it itemset_str = ' '.join(names[i] for i in itemset) if (filterSearch and (len(itemset) < itemsetMin or itemsetMax < len(itemset) or not isRegexMatch(itemset_str, itemset_str))): continue for rule in gen_assoc_rules(itemsets, self.minConfidence / 100, itemset): (left, right), support, confidence = rule if class_items and right - class_items: continue if filterSearch and not isSizeMatch(len(left), len(right)): continue left_str = ' '.join(names[i] for i in sorted(left)) right_str = ' '.join(names[i] for i in sorted(right)) if filterSearch and not isRegexMatch(left_str, right_str): continue # All filters matched, calculate stats and add table row _, _, _, coverage, strength, lift, leverage = next( rules_stats((rule,), itemsets, n_examples)) support_item = NumericItem(support / n_examples) # Set row data on first column support_item.setData((itemset - class_items, class_items and (class_items & itemset).pop()), self.ROW_DATA_ROLE) left_item = StandardItem(left_str, len(left)) left_item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) model.appendRow([support_item, NumericItem(confidence), NumericItem(coverage), NumericItem(strength), NumericItem(lift), NumericItem(leverage), left_item, StandardItem('→'), StandardItem(right_str, len(right))]) #~ scatter_agg[(round(support / n_examples, 2), round(confidence, 2))].append((left, right)) nRules += 1 progress.advance() if nRules >= self.maxRules: break if nRules >= self.maxRules: break # Populate the TableView table = self.table table.setHidden(True) table.setSortingEnabled(False) proxy_model = self.proxy_model proxy_model.setSourceModel(model) table.setModel(proxy_model) for i in range(model.columnCount()): table.resizeColumnToContents(i) table.setSortingEnabled(True) table.setHidden(False) progress.finish() self.nRules = nRules self.nFilteredRules = proxy_model.rowCount() # TODO: continue; also add in owitemsets self.nSelectedRules = 0 self.nSelectedExamples = 0
def find_rules(self): if self.data is None or not len(self.data): return if self._is_running: return self._is_running = True data = self.data self.table.model().clear() n_examples = len(data) NumericItem = self.NumericItem StandardItem = self.StandardItem filterSearch = self.filterSearch itemsetMin = self.filterAntecedentMin + self.filterConsequentMin itemsetMax = self.filterAntecedentMax + self.filterConsequentMax isSizeMatch = self.isSizeMatch isRegexMatch = self.isRegexMatch X, mapping = OneHot.encode(data, self.classify) self.error(911) if X is None: self.error(911, 'Need some discrete data to work with.') self.onehot_mapping = mapping ITEM_FMT = '{}' if issparse(data.X) else '{}={}' names = { item: ('{}={}' if var is data.domain.class_var else ITEM_FMT).format( var.name, val) for item, var, val in OneHot.decode(mapping, data, mapping) } # Items that consequent must include if classifying class_items = { item for item, var, val in OneHot.decode(mapping, data, mapping) if var is data.domain.class_var } if self.classify else set() assert bool(class_items) == bool(self.classify) model = QStandardItemModel(self.table) for col, (label, tooltip) in enumerate([ ("Supp", "Support"), ("Conf", "Confidence (support / antecedent support)"), ("Covr", "Coverage (antecedent support / number of examples)"), ("Strg", "Strength (consequent support / antecedent support)"), ("Lift", "Lift (number of examples * confidence / consequent support)"), ("Levr", "Leverage ((support * number of examples - antecedent support * consequent support) / (number of examples)²)" ), ("Antecedent", None), ("", None), ("Consequent", None) ]): item = QStandardItem(label) item.setToolTip(tooltip) model.setHorizontalHeaderItem(col, item) #~ # Aggregate rules by common (support,confidence) for scatterplot #~ scatter_agg = defaultdict(list) # Find itemsets nRules = 0 itemsets = {} with self.progressBar(self.maxRules + 1) as progress: for itemset, support in frequent_itemsets(X, self.minSupport / 100): itemsets[itemset] = support if class_items and not class_items & itemset: continue # Filter itemset by joined filters before descending into it itemset_str = ' '.join(names[i] for i in itemset) if (filterSearch and (len(itemset) < itemsetMin or itemsetMax < len(itemset) or not isRegexMatch(itemset_str, itemset_str))): continue for rule in association_rules(itemsets, self.minConfidence / 100, itemset): left, right, support, confidence = rule if class_items and right - class_items: continue if filterSearch and not isSizeMatch(len(left), len(right)): continue left_str = ', '.join(names[i] for i in sorted(left)) right_str = ', '.join(names[i] for i in sorted(right)) if filterSearch and not isRegexMatch(left_str, right_str): continue # All filters matched, calculate stats and add table row _, _, _, _, coverage, strength, lift, leverage = next( rules_stats((rule, ), itemsets, n_examples)) support_item = NumericItem(support / n_examples) # Set row data on first column support_item.setData( (itemset - class_items, class_items and (class_items & itemset).pop()), self.ROW_DATA_ROLE) left_item = StandardItem(left_str, len(left)) left_item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) model.appendRow([ support_item, NumericItem(confidence), NumericItem(coverage), NumericItem(strength), NumericItem(lift), NumericItem(leverage), left_item, StandardItem('→'), StandardItem(right_str, len(right)) ]) #~ scatter_agg[(round(support / n_examples, 2), round(confidence, 2))].append((left, right)) nRules += 1 progress.advance() if nRules >= self.maxRules: break if nRules >= self.maxRules: break # Populate the TableView table = self.table table.setHidden(True) table.setSortingEnabled(False) proxy_model = self.proxy_model proxy_model.setSourceModel(model) table.setModel(proxy_model) for i in range(model.columnCount()): table.resizeColumnToContents(i) table.setSortingEnabled(True) table.setHidden(False) self.nRules = nRules self.nFilteredRules = proxy_model.rowCount( ) # TODO: continue; also add in owitemsets self.nSelectedRules = 0 self.nSelectedExamples = 0 self._is_running = False
often, .01, ) #这里设置置信度frozenset({'肺炎'}) rules = list(rules) def dealResult(rules): returnRules = [] for i in rules: temStr = '' for j in i[0]: #处理第一个frozenset temStr = temStr + j + '&' temStr = temStr[:-1] temStr = temStr + ' ==> ' for j in i[1]: temStr = temStr + j + '&' temStr = temStr[:-1] temStr = temStr + ';' + '\t' + str(i[2]) + ';' + '\t' + str( i[3]) + ';' + '\t' + str(i[4]) + ';' + '\t' + str( i[5]) + ';' + '\t' + str(i[6]) + ';' + '\t' + str(i[7]) # print(temStr) returnRules.append(temStr) return returnRules printRules = dealResult(rules) print(printRules) result = list(oaf.rules_stats(rules, often, len(ryzd))) printResult = dealResult(result) print(printResult)
def doAnalysize(self, pd_data, category, supportRate=0.02, confidenceRate=0.5, savepath=r'C:\Users\Administrator\Desktop'): # 初始化词库路径 savepath = savepath + "\\" + category if not os.path.exists(savepath): os.makedirs(savepath) initpath = "tmall\\spiders\\DataAnalysize\\jiebaInit\\" + category + ".txt" jieba.load_userdict(initpath) pd_data['ratecontent_list'] = pd_data.apply( lambda r: list(jieba.cut(r['rateContent'])), axis=1) aim_list = [] with open(initpath, 'r', encoding="utf-8") as f: for line in f.readlines(): aim_list.append(line.strip('\n')) pd_data['aim_list'] = pd_data.apply(lambda r: list( set(r['ratecontent_list']).intersection(set(aim_list))), axis=1) simple_aimdata = [] pd_data.apply(lambda r: simple_aimdata.append(r['aim_list']) if not r['aim_list'] == [] else 1, axis=1) wordcloudlist = [] for item in simple_aimdata: for i in item: wordcloudlist.append(i) # 生成每种分析的词云图 self.everyWordCloud(wordcloudlist, savepath) #经过上面两行操作,得到目标列表: simple_aimdata strSet = set(functools.reduce(lambda a, b: a + b, simple_aimdata)) strEncode = dict(zip(strSet, range( len(strSet)))) # 编码字典,即:{'甜腻': 6,'鱼腥味': 53,etc...} strDecode = dict( zip(strEncode.values(), strEncode.keys())) # 解码字典,即:{6:'甜腻',53:'鱼腥味',etc...} listToAnalysis_int = [ list(map(lambda item: strEncode[item], row)) for row in simple_aimdata ] # 开始进行关联分析 itemsets = dict(oaf.frequent_itemsets(listToAnalysis_int, supportRate)) # print("itemsets : ") # print(itemsets) rules = oaf.association_rules(itemsets, confidenceRate) rules = list(rules) regularNum = len(rules) printRules = self.dealRules(rules, strDecode) # 该变量可以打印查看生成的规则 # print(printRules) result = list(oaf.rules_stats( rules, itemsets, len(listToAnalysis_int))) # 下面这个函数改变了rules,把rules用完了! # print(result) printResult = self.dealResult(result, strDecode) # 该变量可以打印查看结果 # print(printResult) #################################################下面将结果保存成excel格式的文件 # save rules to excel dfToSave = self.ResultDFToSave(result, strDecode) saveRegularName = savepath + "\\" + str(supportRate) + '支持度_' + str( confidenceRate) + '置信度_产生了' + str(regularNum) + '条规则' + '.xlsx' dfToSave.to_excel(saveRegularName) # save itemsets to excel self.saveItemSets(itemsets, strDecode, savepath) #######################################################下面是根据不同置信度和关联度得到关联规则数目 listTable = [] supportRate = 0.01 confidenceRate = 0.1 for i in range(9): support = supportRate * (i + 1) listS = [] for j in range(9): confidence = confidenceRate * (j + 1) itemsets = dict( oaf.frequent_itemsets(listToAnalysis_int, support)) rules = list(oaf.association_rules(itemsets, confidence)) listS.append(len(rules)) listTable.append(listS) dfList = pd.DataFrame( listTable, index=[supportRate * (i + 1) for i in range(9)], columns=[confidenceRate * (i + 1) for i in range(9)]) dfList.to_excel(savepath + "\\" + 'regularNum.xlsx')
for i in rules: temList = [] temStr = '' for j in i[0]: #处理第一个frozenset temStr = temStr + str(j) + '&' temStr = temStr[:-1] temStr = temStr + ' ==> ' for j in i[1]: temStr = temStr + str(j) + '&' temStr = temStr[:-1] temList.append(temStr) temList.append(i[2]) temList.append(i[3]) temList.append(i[4]) temList.append(i[5]) temList.append(i[6]) temList.append(i[7]) returnRules.append(temList) return pd.DataFrame(returnRules, columns=('规则', '项集出现数目', '置信度', '覆盖度', '力度', '提升度', '利用度')) often = dict(oaf.frequent_itemsets(ryzd, .02)) rules = oaf.association_rules(often, .5) #这里设置置信度 rules = list(rules) printRules = dealRules(rules) result = list(oaf.rules_stats(rules, often, len(ryzd))) #下面这个函数改变了rules,把rules用完了! printResult = dealResult(result) print(printResult)
def associateRules(support=0.02, confidence=0.5): support = 0.15 confidence = 0.15 try: with open('filelocation.json') as f_obj: fileInput = json.load(f_obj) except: with open('errorFlag.json', 'w') as e_obj: json.dump("File open process failed", e_obj) return filename = fileInput dfar = pd.read_csv(filename) tag = list(dfar.columns.values) listToAnalysis = [] #最终结果 for item in range(1, len(tag) - 1): #遍历列 imax = max(list(dfar[tag[item]])) #上界 imin = min(list(dfar[tag[item]])) #下界 ijc = imax - imin #极差 l = ijc / 4 i1 = imin + l i2 = i1 + l i3 = i2 + l listToStore = [] for i in range(dfar.shape[0]): s = dfar.iloc[i][tag[item]] if s >= i3 and s <= imax: ss = tag[item] + str(i3) + '-' + str(imax) elif s >= i2: ss = tag[item] + str(i2) + '-' + str(i3) elif s >= i1: ss = tag[item] + str(i1) + '-' + str(i2) elif s >= imin: ss = tag[item] + str(imin) + '-' + str(i1) listToStore.append(ss) listToAnalysis.append(listToStore.copy()) listToAnalysis2 = [] ll = len(listToAnalysis[0]) for ii in range(ll): ltmp = [] for it in listToAnalysis: ltmp.append(it[ii]) listToAnalysis2.append(ltmp.copy()) #创建编码词典与解码词典 what = functools.reduce(lambda a, b: a + b, listToAnalysis2) strSet = set(what) zz = zip(strSet, range(len(strSet))) strEncode = dict(zz) #编码字典 strDecode = dict(zip(strEncode.values(), strEncode.keys())) #解码字典 listToAnalysis_int = [ list(map(lambda item: strEncode[item], row)) for row in listToAnalysis2 ] with open('Information.json') as obj: infostring = json.load(obj) inforlist = infostring.split(' ') confidence = float(inforlist[0]) / float(100) support = float(inforlist[1]) / float(100) itemsets = dict(oaf.frequent_itemsets(listToAnalysis_int, support)) #频繁项集 rules = oaf.association_rules(itemsets, confidence) rules = list(rules) #关联规则 regularNum = len(rules) #printRules=dealResult(result,strDecode) ####### #print("You will get ") #print(regularNum) #print("association rules when\n"+"SupportRate = ",end='') #print(support,end='') #print("ConfidenceRate = "+str(confidence)) informationBack="You will get "+str(regularNum)+"association rules when\n"\ +"SupportRate = "+str(support)+" ConfidenceRate = "+str(confidence) with open('InformationBack.json', 'w') as inf: json.dump(informationBack, inf) result = list(oaf.rules_stats(rules, itemsets, len(listToAnalysis_int))) dfToSave = ResultDFToSave(result, strDecode) with open('arInteractiveText.json', 'w') as ij: json.dump(str(dfToSave), ij) saveRegularName = "Processed.xlsx" dfToSave.to_excel(saveRegularName) return regularNum
listToAnalysis.append(listToStore.copy()) listToStore.clear() #进行编码,将listToAnalysis里面的字符串转换成整数 strSet = set(functools.reduce(lambda a,b:a+b, listToAnalysis)) strEncode = dict(zip(strSet,range(len(strSet)))) #编码字典,即:{'ArticleTag_BS': 6,'Country_Argentina': 53,etc...} strDecode = dict(zip(strEncode.values(), strEncode.keys())) #解码字典,即:{6:'ArticleTag_BS',53:'Country_Argentina',etc...} listToAnalysis_int = [list(map(lambda item:strEncode[item],row)) for row in listToAnalysis] #开始进行关联分析 supportRate = 0.02 confidenceRate = 0.5 itemsets = dict(oaf.frequent_itemsets(listToAnalysis_int, supportRate)) rules = oaf.association_rules(itemsets, confidenceRate) rules = list(rules) regularNum = len(rules) printRules = dealRules(rules,strDecode) #该变量可以打印查看生成的规则 result = list(oaf.rules_stats(rules, itemsets, len(listToAnalysis_int))) #下面这个函数改变了rules,把rules用完了! printResult = dealResult(result,strDecode) #该变量可以打印查看结果 #################################################下面将结果保存成excel格式的文件 dfToSave = ResultDFToSave(result,strDecode) saveRegularName = str(supportRate)+'支持度_'+str(confidenceRate)+'置信度_产生了'+str(regularNum)+'条规则'+'.xlsx' dfToSave.to_excel(saveRegularName) #######################################################下面是根据不同置信度和关联度得到关联规则数目 listTable = [] supportRate = 0.01 confidenceRate = 0.1 for i in range(9): support = supportRate*(i+1) listS = [] for j in range(9):
def ResultDFToSave(rules): #根据Qrange3关联分析生成的规则得到并返回对于的DataFrame数据结构的函数 returnRules = [] for i in rules: temList = [] temStr = '' for j in i[0]: #处理第一个frozenset temStr = temStr + str(j) + '&' temStr = temStr[:-1] temStr = temStr + ' ==> ' for j in i[1]: temStr = temStr + str(j) + '&' temStr = temStr[:-1] temList.append(temStr) temList.append(i[2]) temList.append(i[3]) temList.append(i[4]) temList.append(i[5]) temList.append(i[6]) temList.append(i[7]) returnRules.append(temList) return pd.DataFrame(returnRules, columns=('规则', '项集出现数目', '置信度', '覆盖度', '力度', '提升度', '利用度')) printRules = dealRules(rules) result = list(oaf.rules_stats(rules, often, len(listToAnalysis))) #下面这个函数改变了rules,把rules用完了! printResult = dealResult(result)
def find_rules(self): if self.data is None or not len(self.data): return if self._is_running: self._is_running = False return self.button.button.setText('Cancel') self._is_running = True data = self.data self.table.model().clear() n_examples = len(data) NumericItem = self.NumericItem StandardItem = self.StandardItem filterSearch = self.filterSearch itemsetMin = self.filterAntecedentMin + self.filterConsequentMin itemsetMax = self.filterAntecedentMax + self.filterConsequentMax isSizeMatch = self.isSizeMatch isRegexMatch = self.isRegexMatch X, mapping = OneHot.encode(data, self.classify) self.Error.need_discrete_data.clear() if X is None: self.Error.need_discrete_data() self.onehot_mapping = mapping ITEM_FMT = '{}' if issparse(data.X) else '{}={}' names = {item: ('{}={}' if var is data.domain.class_var else ITEM_FMT).format(var.name, val) for item, var, val in OneHot.decode(mapping, data, mapping)} # Items that consequent must include if classifying class_items = {item for item, var, val in OneHot.decode(mapping, data, mapping) if var is data.domain.class_var} if self.classify else set() assert bool(class_items) == bool(self.classify) model = QStandardItemModel(self.table) for col, (label, _, tooltip) in enumerate(self.header): item = QStandardItem(label) item.setToolTip(tooltip) model.setHorizontalHeaderItem(col, item) # Find itemsets nRules = 0 itemsets = {} ARROW_ITEM = StandardItem('→') ARROW_ITEM.setTextAlignment(Qt.AlignCenter) with self.progressBar(self.maxRules + 1) as progress: for itemset, support in frequent_itemsets(X, self.minSupport / 100): itemsets[itemset] = support if class_items and not class_items & itemset: continue # Filter itemset by joined filters before descending into it itemset_str = ' '.join(names[i] for i in itemset) if (filterSearch and (len(itemset) < itemsetMin or itemsetMax < len(itemset) or not isRegexMatch(itemset_str, itemset_str))): continue for rule in association_rules(itemsets, self.minConfidence / 100, itemset): left, right, support, confidence = rule if class_items and right - class_items: continue if filterSearch and not isSizeMatch(len(left), len(right)): continue left_str = ', '.join(names[i] for i in sorted(left)) right_str = ', '.join(names[i] for i in sorted(right)) if filterSearch and not isRegexMatch(left_str, right_str): continue # All filters matched, calculate stats and add table row _, _, _, _, coverage, strength, lift, leverage = next( rules_stats((rule,), itemsets, n_examples)) support_item = NumericItem(support / n_examples) # Set row data on first column support_item.setData((itemset - class_items, class_items and (class_items & itemset).pop()), self.ROW_DATA_ROLE) left_item = StandardItem(left_str, len(left)) left_item.setTextAlignment(Qt.AlignRight | Qt.AlignVCenter) model.appendRow([support_item, NumericItem(confidence), NumericItem(coverage), NumericItem(strength), NumericItem(lift), NumericItem(leverage), left_item, ARROW_ITEM.clone(), StandardItem(right_str, len(right))]) nRules += 1 progress.advance() if not self._is_running or nRules >= self.maxRules: break qApp.processEvents() if not self._is_running or nRules >= self.maxRules: break # Populate the TableView table = self.table table.setHidden(True) table.setSortingEnabled(False) proxy_model = self.proxy_model proxy_model.setSourceModel(model) table.setModel(proxy_model) for i in range(model.columnCount()): table.resizeColumnToContents(i) table.setSortingEnabled(True) table.setHidden(False) self.table_rules = proxy_model.get_data() if self.table_rules is not None: self.Outputs.rules.send(self.table_rules) self.button.button.setText('Find Rules') self.nRules = nRules self.nFilteredRules = proxy_model.rowCount() # TODO: continue; also add in owitemsets self.nSelectedRules = 0 self.nSelectedExamples = 0 self._is_running = False
# So the items ‘4’ and ‘25’ (fifth and twenty sixth columns of X) are the only items (and itemsets) that appear 10 or more times. Let’s check this: print((X.sum(axis=0) >= 10).nonzero()[1]) # Conclusion: Given databases of uniformly distributed random data, there’s not much to work with. # Examples with rules np.random.seed(0) N = 100 X = np.random.random((N, 100)) > .9 # Find all itemsets with at least 5% support: itemsets = dict(ofpg.frequent_itemsets(X, .05)) # Generate all association rules from these itemsets with minimum 50% confidence: rules = ofpg.association_rules(itemsets, .5) rules = list(rules) # Or only the rules for a particular itemset: print(list(ofpg.association_rules(itemsets, .3, frozenset({75, 98})))) # Examples of additional stats for rules generated by association_rules() N = 30 X = np.random.random((N, 50)) > .9 itemsets = dict(ofpg.frequent_itemsets(X, .1)) rules = ofpg.association_rules(itemsets, .6) print(list(ofpg.rules_stats(rules, itemsets, N)))