def generate_frequent_items(minimum_support): frequent_itemsets = fpg.find_frequent_itemsets(data_list, minimum_support=0.07 * len(data_list), include_support=True) print(type(frequent_itemsets)) # print type result = [] for itemset, support in frequent_itemsets: # 将generator结果存入list result.append((itemset, support / len(data_list))) result_patterns = [i[0] for i in result] result_support = [i[1] for i in result] patterns_df = pd.DataFrame({"fluent_patterns": result_patterns, "support": result_support}) patterns = {} for i in result: patterns[frozenset(sorted(i[0]))] = i[1] print("-------------挖掘频繁项集---------------") print(patterns_df) print("--------------频繁项集------------------") return patterns
# python3 # -*- coding: utf-8 -*- # @Author : lina # @Time : 2018/5/13 11:40 import fp_growth_py3 as fpg # 数据集 dataset = [['啤酒', '牛奶', '可乐'], ['尿不湿', '啤酒', '牛奶', '橙汁'], ['啤酒', '尿不湿'], ['啤酒', '可乐', '尿不湿'], ['啤酒', '牛奶', '可乐']] if __name__ == '__main__': ''' 调用find_frequent_itemsets()生成频繁项 @:param minimum_support表示设置的最小支持度,即若支持度大于等于inimum_support,保存此频繁项,否则删除 @:param include_support表示返回结果是否包含支持度,若include_support=True,返回结果中包含itemset和support,否则只返回itemset ''' frequent_itemsets = fpg.find_frequent_itemsets(dataset, minimum_support=1, include_support=True) print(type(frequent_itemsets)) # print type result = [] for itemset, support in frequent_itemsets: # 将generator结果存入list result.append((itemset, support)) result = sorted(result, key=lambda i: i[0]) # 排序后输出 for itemset, support in result: print(str(itemset) + ' ' + str(support))
f.write(k + ' ' + str(v) + '\n') if __name__ == '__main__': print('Reading input file...') articles = input('台南 美食(100頁).txt') print('Splitting...') s = split_into_word(articles, 'str') print('Converting to transactions...') trans = text2trans(s) sw_path = 'stop_words.txt' print('Cleaning symbols...') trans = clean(trans, sw_path) # remove stop words trans = rm_dup(trans) print('Finding frequent patterns...') fp = fp.find_frequent_itemsets(trans, 2, True) fp = sort(fp) # sort by support food = food_dict('food_list.txt') result = find_food(fp, food) # return's a dictionary print('Writing output file...') output(fp, 'fp台南(100頁).txt') output_result(result, 'fp台南(100頁)(only food).txt') print('Done!')