Exemple #1
0
def rule2():
    from mlxtend.frequent_patterns import apriori as ap
    from mlxtend.frequent_patterns import association_rules
    pd.options.display.max_columns = 1000
    start = time.time()
    hot_encoded_df = data.groupby([
        '订单数量', '产品名称'
    ])['产品名称'].count().unstack().reset_index().fillna(0).set_index('订单数量')

    hot_encoded_df = hot_encoded_df.applymap(encode_units)
    print(hot_encoded_df)
    frequent_itemsets = ap(hot_encoded_df, min_support=0.01, use_colnames=True)

    rules = association_rules(frequent_itemsets,
                              metric="lift",
                              min_threshold=0.2)

    print("频繁项集:", frequent_itemsets)

    print("关联规则:", rules[(rules['lift'] >= 1) & (rules['confidence'] >= 0.3)])

    rules.to_excel(
        'E:\\python_project\\GIT\\DATA_ANALYSIS\\test\\test\\关联规则2.xlsx')
    frequent_itemsets.to_excel(
        'E:\\python_project\\GIT\\DATA_ANALYSIS\\test\\test\\频繁项集2.xlsx')
    end = time.time()
def rule2():
    from mlxtend.frequent_patterns import apriori as ap
    from mlxtend.frequent_patterns import association_rules
    pd.options.display.max_columns = 1000
    start = time.time()
    hot_encoded_df = data.groupby(
        ['FaultNo.',
         'Alarm Msg.'])['Alarm Msg.'].count().unstack().reset_index().fillna(
             0).set_index('FaultNo.')

    hot_encoded_df = hot_encoded_df.applymap(encode_units)
    frequent_itemsets = ap(hot_encoded_df, min_support=0.01, use_colnames=True)

    rules = association_rules(frequent_itemsets,
                              metric="lift",
                              min_threshold=0.2)

    print("频繁项集:", frequent_itemsets)

    print("关联规则:", rules[(rules['lift'] >= 1) & (rules['confidence'] >= 0.2)])
    print(rules['confidence'])
    rules.to_excel('E:\DATA_ENGIN\\apiority\\' + F + '关联规则2.xlsx')
    frequent_itemsets.to_excel('E:\DATA_ENGIN\\apiority\\' + F + '频繁项集2.xlsx')
    end = time.time()
    print("总用时:", end - start)
def data_load(file):
    try:
        df = pd.read_csv(file, sep=';', encoding = 'utf-8', index_col=False,
                error_bad_lines = False)
    except:
        print('incorrect file path')
        
    return df
df = data_load(file)
df.head()
df.dtypes
df = df.loc[:,df.columns != 'id']


freq_items = ap(df,
                min_support = 0.11,
                use_colnames=True)
freq_items.head()

rules = ar(freq_items,
           metric = 'lift',
           min_threshold = 1)
rules.head()

rules['confidence'].sort_values(ascending = False).head(100)
rules['lift'].sort_values(ascending = False).head(100)

pd.set_option('display.max_columns', None)

rules[ (rules['lift'] >= 1) & (rules['confidence'] >= 0.8)]
Exemple #4
0
#5. Przekonwertuj dane do postaci którą przyjmuję algorytm Apriori (DataFrame gdzie indexami są
#numery transakcji a wartościami kolumn True lub False w zależności czy dany produkt wystąpił w
#transakcji).
#6. Wygeneruj listę reguł dla wybranej przez Ciebie wartości min_support. Przejrzyj reguły a następnie
#wybierz 5 Twoim zdaniem najlepszych.
#7. Spróbuj zwiększyć wartość min_support, co się wtedy dzieję z liczbą reguł?
#8. Wypisz wszystkie reguły, których wartość lift jest większa niż 5 i wartość confidence
#jest większa niż 0.8


df_EIRE = df[df['Country'] == 'EIRE']
df_EIRE = df_EIRE[['InvoiceNo', 'Description', 'Quantity']]
df_EIRE = (df_EIRE.groupby(['InvoiceNo', 'Description'])['Quantity'].
               sum().unstack().fillna(0))
df_EIRE[df_EIRE == 0] = False
df_EIRE[df_EIRE != 0] = True

freq_items = ap(df_EIRE,
                min_support = 0.05,
                use_colnames=True)
freq_items.head(10)

rules = ar(freq_items,
           metric = 'lift',
           min_threshold = 1)
rules.head(10)

rules['confidence'].sort_values(ascending = False).head(10)
rules['lift'].sort_values(ascending = False).head(10)
rules[ (rules['lift'] >= 1) & (rules['confidence'] >= 0.8)]
Exemple #5
0
         edgecolor='red')
plt.xticks(np.arange(0, 32, step=2.5))
plt.xlabel("The Items number of every customer")
plt.ylabel("Frequency")
plt.show()
print()

## b)
te = tec()
cusItemList = DataFrame.groupby(['Customer'
                                 ])['Item'].apply(list).values.tolist()
te_ary = te.fit(cusItemList).transform(cusItemList)
ItemIndicator = pds.DataFrame(te_ary, columns=te.columns_)

frequent_itemsets = ap(ItemIndicator,
                       min_support=(75 / customerItem.count()),
                       max_len=32,
                       use_colnames=True)
print(frequent_itemsets)
print()

## c)
# Discover the association rules
assoc_rules = as_r(frequent_itemsets, metric="confidence", min_threshold=0.01)
print('We can find', len(assoc_rules), 'Association rules')
print(assoc_rules)
print()


## d)
def showGraph():
    plt.figure(facecolor='white', edgecolor='white')
Exemple #6
0
for i in range(len(cgm)):
    CGmax.append(maximum(cgm.loc[i]))
    BOLmax.append(maximum(bol.loc[i]))
    CG0.append(cgm.loc[i][5])
    apriDF.append([
        CalculateBins(max(cgm.loc[i])),
        CalculateBins(cgm.loc[i][5]),
        max(bol.loc[i])
    ])

#Apriori Algorithm
#For Most Frequent Itemsets
transEnc = TransactionEncoder()
transactions = pd.DataFrame(transEnc.fit(apriDF).transform(apriDF),
                            columns=transEnc.columns_)
rules = ar(ap(transactions, min_support=0.00000000001, use_colnames=True),
           min_threshold=0.0)
rules["antecedent_len"] = rules["antecedents"].apply(lambda x: len(x))
for column in ['antecedents', 'consequents']:
    rules[column] = rules[column].astype(str)
    rules[column] = rules[column].str.replace(re.escape('frozenset({'), '')
    rules[column] = rules[column].str.replace(re.escape('})'), '')
rules["SET"] = rules["antecedents"] + ',' + rules['consequents']
rules['SET'] = rules['SET'].str.replace("'", "")
rules['SET'] = rules.SET.apply(lambda x: x.split(','))
#rules.to_csv("Rules.csv")
li = rules['SET'].tolist()
y = [[(float(j)) for j in i] for i in li]
for i in y:
    i.sort(reverse=True)
b = list()
    for com in commons:
        labels[com] = 1
    encoded_vals.append(labels)
encoded_vals[0]

encode_df = pd.DataFrame(encoded_vals)

print(encode_df.head())

##
## Defining the apriori algorithm.
##

freq_items = ap(encode_df,
                min_support=0.0085,
                use_colnames=True,
                verbose=1,
                low_memory=False)
print(freq_items.head())

##
## Defining the association rules algorithms to match and find similar items together based on confidence.
##

assocn_rules_conf = ar(freq_items, metric="confidence", min_threshold=0.25)
print(assocn_rules_conf)

##
## Defining the association rules algorithms to match and find similar items together based on support.
##
assocn_rules_supp = ar(freq_items, metric="support", min_threshold=0.005)