def test_max_len(self): res_df1 = fpmax(self.df) max_len = np.vectorize(len)(res_df1['itemsets']).max() assert max_len == 3 res_df2 = fpmax(self.df, max_len=2) max_len = np.vectorize(len)(res_df2['itemsets']).max() assert max_len == 2
def test_max_len(self): res_df1 = fpmax(self.df) max_len = np.max(res_df1['itemsets'].apply(len)) assert max_len == 3 res_df2 = fpmax(self.df, max_len=2) max_len = np.max(res_df2['itemsets'].apply(len)) assert max_len == 2
def getMaxFrequentPatterns(df, columns, class_to_explain, lprefix='ltable_', rprefix='rtable_', min_support=0.2, k=15): transactions = [] for i in range(len(df)): leftValues, rightValues = [], [] for attr in columns: if attr.startswith(lprefix): leftValues += str(df.iloc[i][attr]).split() elif attr.startswith(rprefix): rightValues += str(df.iloc[i][attr]).split() if class_to_explain == 0: selectedRightValues = set(leftValues).intersection( set(rightValues)) selectedLeftValues = selectedRightValues.copy() else: selectedLeftValues = set(leftValues).difference(set(rightValues)) selectedRightValues = set(rightValues).difference(set(leftValues)) leftValuesPrefixed = list( map(lambda val: 'L_' + val, selectedLeftValues)) rightValuesPrefixed = list( map(lambda val: 'R_' + val, selectedRightValues)) transactions.append(leftValuesPrefixed + rightValuesPrefixed) te = TransactionEncoder() te_ary = te.fit(transactions).transform(transactions) df = pd.DataFrame(te_ary, columns=te.columns_) frequent_itemsets = fpmax(df, min_support=min_support, use_colnames=True) return frequent_itemsets
def mineAssociationRules(df, columns, class_to_explain, lprefix='ltable_', rprefix='rtable_', min_confidence=0.5, min_support=0.2): transactions = _createTransactions(df, columns, class_to_explain, left_prefix=lprefix, right_prefix=rprefix) te = TransactionEncoder() te_ary = te.fit(transactions).transform(transactions) df = pd.DataFrame(te_ary, columns=te.columns_) frequent_itemsets = fpmax(df, min_support=min_support, use_colnames=True) ar = association_rules(frequent_itemsets, metric="confidence", min_threshold=min_confidence) ar['antecedents_isleft'] = ar['antecedents'].apply( lambda s: all(token.startswith('L_') for token in s)) ar['consequents_isright'] = ar['consequents'].apply( lambda s: all(token.startswith('R_') for token in s)) important_rules = ar[(ar.antecedents_isleft == True) & (ar.consequents_isright == True)] return important_rules
def test_output(self): res_df = fpmax(self.df, min_support=0.001, use_colnames=True) expect = pd.DataFrame( [[0.25, frozenset(['a'])], [0.25, frozenset(['b'])], [0.25, frozenset(['c', 'd'])], [0.25, frozenset(['e'])]], columns=['support', 'itemsets']) compare_dataframes(res_df, expect)
def test_default(self): res_df = fpmax(self.df) expect = pd.DataFrame( [[0.6, frozenset([5, 6])], [0.6, frozenset([5, 10])], [0.6, frozenset([3, 5, 8])]], columns=['support', 'itemsets']) compare_dataframes(res_df, expect)
def getMaxFrequentPatterns(df, columns, class_to_explain, lprefix='ltable_', rprefix='rtable_', min_support=0.2, k=15): transactions = _createTransactions(df, columns, class_to_explain, left_prefix=lprefix, right_prefix=rprefix) te = TransactionEncoder() te_ary = te.fit(transactions).transform(transactions) df = pd.DataFrame(te_ary, columns=te.columns_) frequent_itemsets = fpmax(df, min_support=min_support, use_colnames=True) return frequent_itemsets
def find_common(itemsets, occurrences, **kwargs): '''Find common itemsets with FPGroth algorithm''' min_support = occurrences / len(itemsets) return fpmax(itemsets, min_support=min_support, **kwargs)
te_ary = te.fit(np_data).transform(np_data) data = pd.DataFrame(te_ary, columns=te.columns_) print(data) # 2 result_fpgrowth = fpgrowth(data, min_support=0.03, use_colnames=True) result_fpgrowth['length'] = np.fromiter(map(len, result_fpgrowth['itemsets']), dtype=int) print(result_fpgrowth.sort_values('support', ascending=False)) # 3 print(result_fpgrowth.groupby('length').support.min()) print(result_fpgrowth.groupby('length').support.max()) # 4 result_fpmax = fpmax(data, min_support=0.03, use_colnames=True) result_fpmax['length'] = np.fromiter(map(len, result_fpmax['itemsets']), dtype=int) print(result_fpmax.groupby('length').support.min()) print(result_fpmax.groupby('length').support.max()) print(result_fpmax.sort_values('support', ascending=False)) # 6 plt.figure(figsize=(8, 6)) count_of_items = data.sum() count_of_items.nlargest(10).plot.bar() plt.figure(figsize=(8, 6)) data_ = result_fpgrowth[result_fpgrowth.length == 1].sort_values( 'support', ascending=False).set_index('itemsets').support data_.nlargest(10).plot.bar()
while True: sups = result[result['itemsets'].apply( lambda r: len(r) == curr_len)]['support'] if len(sups) == 0: break print('Длина набора {len}: поддержка [{min}, {max}]'.format( len=curr_len, min=round(np.min(sups), 5), max=round(np.max(sups), 5))) curr_len += 1 printMinMaxSupport(fpg_result) # %% fpm_result = fpmax(data, min_support=0.03, use_colnames=True).sort_values('support', ascending=False) fpm_result # %% printMinMaxSupport(fpm_result) # %% plt.xlabel('Количество попаданий товара в транзакцию') data.sum().nlargest(10).sort_values().plot.barh() # %% plt.xlabel('Уровень поддержки') fpg_result.set_index('itemsets')['support'].nlargest( 10).sort_values().plot.barh() # %%
import pandas as pd import numpy as np from mlxtend.preprocessing import TransactionEncoder from mlxtend.frequent_patterns import fpmax from mlxtend.frequent_patterns import association_rules from fim import eclat from tabulate import tabulate df = pd.read_csv("plants_preprocessed.csv") print("================ FP-Max ==================") dataset = df.values.tolist() te = TransactionEncoder() te_ary = te.fit(dataset).transform(dataset) df = pd.DataFrame(te_ary, columns=te.columns_) itemsets = fpmax(df, min_support=0.001, use_colnames=True,max_len = 10) print(itemsets) print("\n\n RULES based on FP growth : \n\n") rules = association_rules(itemsets, min_threshold=0.0001,support_only=True) print(rules[['antecedents', 'consequents', 'support']]) print("================ ECLAT-Max ================ ") itemsets = eclat(dataset,target='m',supp=2,report='s') print(tabulate(itemsets, headers=['Itemset', 'Support'], tablefmt='pretty'))
# hashtags.drop(dropcol, inplace=True) nodes.reset_index(inplace=True, drop=True) communities.reset_index(inplace=True, drop=True) user_hashtag_matrix.reset_index(inplace=True, drop=True) t = len(hashtags) n = len(nodes) k = max(communities[communities_tagname]) t = len(hashtags) T = range(t) K = range(k) if together_constraint: hashtags.reset_index(inplace=True, drop=True) user_hashtag_matrix.columns = np.arange(0, t) tags = [] for item in hashtags[0]: tags.append(item) user_hashtag_matrix.columns = tags # data = fpmax(user_hashtag_matrix, min_support=0.2941, use_colnames=True) data = fpmax(user_hashtag_matrix, min_support=0.489, use_colnames=True) name = "maximal " + str(cluster) file = open(name, "w") print(data, file=file) # print(fpmax(user_hashtag_matrix, min_support=0.3, use_colnames=True)) # print(fpmax(user_hashtag_matrix, min_support=0.25, use_colnames=True)) # print(fpmax(user_hashtag_matrix, min_support=0.2, use_colnames=True))
te = TransactionEncoder() te_ary = te.fit(np_data).transform(np_data) data = pd.DataFrame(te_ary, columns=te.columns_) print(data) result = fpgrowth(data, min_support=0.03, use_colnames = True) print(result) result['length'] = result['itemsets'].apply(lambda x: len(x)) result_1 = result[result['length'] == 1] print("len 1 min:", min(result_1['support'])) print("len 1 max:", max(result_1['support'])) result_2 = result[result['length'] == 2] print("len 2 min:", min(result_2['support'])) print("len 2 max:", max(result_2['support'])) result = fpmax(data, min_support=0.03, use_colnames=True) print(result) result['length'] = result['itemsets'].apply(lambda x: len(x)) result_1 = result[result['length'] == 1] print("len 1 min:", min(result_1['support'])) print("len 1 max:", max(result_1['support'])) result_2 = result[result['length'] == 2] print("len 2 min:", min(result_2['support'])) print("len 2 max:", max(result_2['support'])) count_of_items = data.sum() count_of_items.nlargest(10).plot.bar() plt.tight_layout() plt.show() items = ['whole milk', 'yogurt', 'soda', 'tropical fruit', 'shopping bags', 'sausage',
return True else: return False start = time.time() dataset = [] # 2-D array for storing the sequences with open('out.txt', 'r') as fobj: # Importing values from txt file containing dataset for line in fobj: numbers = [int(num) for num in line.split()] # Single row of the 2-D array dataset.append(numbers) t = TransactionEncoder() t_ary = t.fit(dataset).transform(dataset) # Convrerting to table of true/false df = pd.DataFrame(t_ary, columns=t.columns_) # Converting t_ary table to suitable form for giving input to fpmax frequent_set = fpmax(df, min_support=0.015,use_colnames=True) # Applying fpmax algorithm frequent_set['length'] = frequent_set['itemsets'].apply(lambda x: len(x)) #print(frequent_set) end = time.time() #print(end-start) # For generating length v/s Count plot # d={} # # Intializing a dictionary # for i in range(frequent_set.shape[0]): # if check_dict(d,frequent_set['length'][i]): # d[frequent_set['length'][i]] = d[frequent_set['length'][i]]+1 # else : # d[frequent_set['length'][i]] = 1;