예제 #1
0
def get_rules(encoded_transaction, min_support=0.07, min_threshold=1):
    # 挖掘频繁项集
    frequent_itemsets = apriori(encoded_transaction,
                                min_support=min_support,
                                use_colnames=True)
    rules = ar(frequent_itemsets, metric='lift', min_threshold=min_threshold)

    frequent_itemsets = frequent_itemsets.sort_values(by='support',
                                                      ascending=False)
    print('频繁项集:', frequent_itemsets)

    pd.options.display.max_columns = 100
    rules = rules.sort_values(by='lift', ascending=False)
    print('关联规则:', rules)

    return frequent_itemsets, rules
예제 #2
0
def data_load(file):
    try:
        df = pd.read_csv(file, sep=';', encoding = 'utf-8', index_col=False,
                error_bad_lines = False)
    except:
        print('incorrect file path')
        
    return df
df = data_load(file)
df.head()
df.dtypes
df = df.loc[:,df.columns != 'id']


freq_items = ap(df,
                min_support = 0.11,
                use_colnames=True)
freq_items.head()

rules = ar(freq_items,
           metric = 'lift',
           min_threshold = 1)
rules.head()

rules['confidence'].sort_values(ascending = False).head(100)
rules['lift'].sort_values(ascending = False).head(100)

pd.set_option('display.max_columns', None)

rules[ (rules['lift'] >= 1) & (rules['confidence'] >= 0.8)]
예제 #3
0
for i in range(len(cgm)):
    CGmax.append(maximum(cgm.loc[i]))
    BOLmax.append(maximum(bol.loc[i]))
    CG0.append(cgm.loc[i][5])
    apriDF.append([
        CalculateBins(max(cgm.loc[i])),
        CalculateBins(cgm.loc[i][5]),
        max(bol.loc[i])
    ])

#Apriori Algorithm
#For Most Frequent Itemsets
transEnc = TransactionEncoder()
transactions = pd.DataFrame(transEnc.fit(apriDF).transform(apriDF),
                            columns=transEnc.columns_)
rules = ar(ap(transactions, min_support=0.00000000001, use_colnames=True),
           min_threshold=0.0)
rules["antecedent_len"] = rules["antecedents"].apply(lambda x: len(x))
for column in ['antecedents', 'consequents']:
    rules[column] = rules[column].astype(str)
    rules[column] = rules[column].str.replace(re.escape('frozenset({'), '')
    rules[column] = rules[column].str.replace(re.escape('})'), '')
rules["SET"] = rules["antecedents"] + ',' + rules['consequents']
rules['SET'] = rules['SET'].str.replace("'", "")
rules['SET'] = rules.SET.apply(lambda x: x.split(','))
#rules.to_csv("Rules.csv")
li = rules['SET'].tolist()
y = [[(float(j)) for j in i] for i in li]
for i in y:
    i.sort(reverse=True)
b = list()
for sublist in y:
예제 #4
0
from mlxtend.frequent_patterns import association_rules as ar


def deal(data):
    return data.dropna().tolist()


df_arr = df.apply(deal, axis=1).tolist()  # 转化成列表
te = TE()  # 定义模型
df_tf = te.fit_transform(df_arr)
df = pd.DataFrame(df_tf, columns=te.columns_)
df
freq_itemsets = apriori(df, min_support=0.005, use_colnames=True)
freq_itemsets.sort_values(by='support', ascending=False, inplace=True)
freq_itemsets
a_r = ar(freq_itemsets, metric='lift')
a_r = a_r.sort_values(by='lift', ascending=False).reset_index(drop=True)
a_r
t = []
for i in range(a_r.shape[0]):
    item = a_r.iloc[i]
    t.append(
        item.support /
        math.sqrt(item['antecedent support'] * item['consequent support']))
a_r['cosine'] = t
t = []
for i in range(a_r.shape[0]):
    item = a_r.iloc[i]
    t.append(0.5 * (item.support / item['antecedent support'] +
                    item.support / item['consequent support']))
a_r['Kulc'] = t
##
## Defining the apriori algorithm.
##

freq_items = ap(encode_df,
                min_support=0.0085,
                use_colnames=True,
                verbose=1,
                low_memory=False)
print(freq_items.head())

##
## Defining the association rules algorithms to match and find similar items together based on confidence.
##

assocn_rules_conf = ar(freq_items, metric="confidence", min_threshold=0.25)
print(assocn_rules_conf)

##
## Defining the association rules algorithms to match and find similar items together based on support.
##
assocn_rules_supp = ar(freq_items, metric="support", min_threshold=0.005)
print(assocn_rules_supp)

##
##
## Plotting the scatter plot of Confidence Vs Support
##

plt.scatter(assocn_rules_conf['support'],
            assocn_rules_conf['confidence'],