def get_rules(encoded_transaction, min_support=0.07, min_threshold=1): # 挖掘频繁项集 frequent_itemsets = apriori(encoded_transaction, min_support=min_support, use_colnames=True) rules = ar(frequent_itemsets, metric='lift', min_threshold=min_threshold) frequent_itemsets = frequent_itemsets.sort_values(by='support', ascending=False) print('频繁项集:', frequent_itemsets) pd.options.display.max_columns = 100 rules = rules.sort_values(by='lift', ascending=False) print('关联规则:', rules) return frequent_itemsets, rules
def data_load(file): try: df = pd.read_csv(file, sep=';', encoding = 'utf-8', index_col=False, error_bad_lines = False) except: print('incorrect file path') return df df = data_load(file) df.head() df.dtypes df = df.loc[:,df.columns != 'id'] freq_items = ap(df, min_support = 0.11, use_colnames=True) freq_items.head() rules = ar(freq_items, metric = 'lift', min_threshold = 1) rules.head() rules['confidence'].sort_values(ascending = False).head(100) rules['lift'].sort_values(ascending = False).head(100) pd.set_option('display.max_columns', None) rules[ (rules['lift'] >= 1) & (rules['confidence'] >= 0.8)]
for i in range(len(cgm)): CGmax.append(maximum(cgm.loc[i])) BOLmax.append(maximum(bol.loc[i])) CG0.append(cgm.loc[i][5]) apriDF.append([ CalculateBins(max(cgm.loc[i])), CalculateBins(cgm.loc[i][5]), max(bol.loc[i]) ]) #Apriori Algorithm #For Most Frequent Itemsets transEnc = TransactionEncoder() transactions = pd.DataFrame(transEnc.fit(apriDF).transform(apriDF), columns=transEnc.columns_) rules = ar(ap(transactions, min_support=0.00000000001, use_colnames=True), min_threshold=0.0) rules["antecedent_len"] = rules["antecedents"].apply(lambda x: len(x)) for column in ['antecedents', 'consequents']: rules[column] = rules[column].astype(str) rules[column] = rules[column].str.replace(re.escape('frozenset({'), '') rules[column] = rules[column].str.replace(re.escape('})'), '') rules["SET"] = rules["antecedents"] + ',' + rules['consequents'] rules['SET'] = rules['SET'].str.replace("'", "") rules['SET'] = rules.SET.apply(lambda x: x.split(',')) #rules.to_csv("Rules.csv") li = rules['SET'].tolist() y = [[(float(j)) for j in i] for i in li] for i in y: i.sort(reverse=True) b = list() for sublist in y:
from mlxtend.frequent_patterns import association_rules as ar def deal(data): return data.dropna().tolist() df_arr = df.apply(deal, axis=1).tolist() # 转化成列表 te = TE() # 定义模型 df_tf = te.fit_transform(df_arr) df = pd.DataFrame(df_tf, columns=te.columns_) df freq_itemsets = apriori(df, min_support=0.005, use_colnames=True) freq_itemsets.sort_values(by='support', ascending=False, inplace=True) freq_itemsets a_r = ar(freq_itemsets, metric='lift') a_r = a_r.sort_values(by='lift', ascending=False).reset_index(drop=True) a_r t = [] for i in range(a_r.shape[0]): item = a_r.iloc[i] t.append( item.support / math.sqrt(item['antecedent support'] * item['consequent support'])) a_r['cosine'] = t t = [] for i in range(a_r.shape[0]): item = a_r.iloc[i] t.append(0.5 * (item.support / item['antecedent support'] + item.support / item['consequent support'])) a_r['Kulc'] = t
## ## Defining the apriori algorithm. ## freq_items = ap(encode_df, min_support=0.0085, use_colnames=True, verbose=1, low_memory=False) print(freq_items.head()) ## ## Defining the association rules algorithms to match and find similar items together based on confidence. ## assocn_rules_conf = ar(freq_items, metric="confidence", min_threshold=0.25) print(assocn_rules_conf) ## ## Defining the association rules algorithms to match and find similar items together based on support. ## assocn_rules_supp = ar(freq_items, metric="support", min_threshold=0.005) print(assocn_rules_supp) ## ## ## Plotting the scatter plot of Confidence Vs Support ## plt.scatter(assocn_rules_conf['support'], assocn_rules_conf['confidence'],