def create_rules(dataframe, country=False, head=5): if country: dataframe = dataframe[dataframe["Country"] == country] dataframe = create_invoice_product_df(dataframe) frequent_itemsets = apriori(dataframe, min_support=0.01, use_colnames=True) rules = association_rules(frequent_itemsets, metric="support", min_threshold=0.01) print(rules.sort_values("lift", ascending=False).head(head)) else: dataframe = create_invoice_product_df(dataframe) frequent_items = apriori(dataframe, min_support=0.01, use_colnames=True) rules = association_rules(frequent_items, metric="support",min_threshold=0.01) print(rules.sort_values("lift", ascending=False).head(head)) return rules
# Control df[(df["StockCode"] == 16016) & (df["Invoice"] == 536983)] df_ger.groupby(['Invoice', 'StockCode']).\ agg({"Quantity": "sum"}).\ unstack().fillna(0).iloc[0:6, 0:12] # Apply, satır ve sütuna göre(axis özelinde seçim yapılır) applymap ise tüm elemanlara göre çalışır. df_ger.groupby(['Invoice', 'StockCode']).\ agg({"Quantity": "sum"}).\ unstack().fillna(0).\ applymap(lambda x: 1 if x > 0 else 0).iloc[0:6, 0:12] # fillna(0) fills NA values with 0. ger_inv_pro_df = create_invoice_product_df(df_ger) ger_inv_pro_df.head() # How many unique products are in each invoice? new_df_ger = df_ger.groupby(['Invoice', 'Description']).agg({'Quantity': "sum"}).fillna(0).\ applymap(lambda x: 1 if x > 0 else 0) new_df_ger.head(30) new_df_ger.reset_index(inplace=True) new_df_ger.head(30) new_df_ger.groupby('Invoice').agg({'Quantity': "sum"}) # How many unique baskets are each product in? df_ger.groupby("Description").agg({"Invoice": "nunique"}) ############################################
df[(df["StockCode"] == 16235) & (df["Invoice"] == 538174)] df[(df["StockCode"] == 17003) & (df["Invoice"] == 537894)] #fiilna() -> boşluklara sıfır koyduk df_fr.groupby(['Invoice', 'StockCode']).\ agg({"Quantity": "sum"}).\ unstack().fillna(0).iloc[0:5, 0:5] #nan değerlerine 0 koyduk yukarıdaki çıktıyı elde etmek için! def create_invoice_product_df(dataframe): #matrisi dataframe olarak aldık #StockCode return dataframe.groupby(['Invoice', 'Description'])['Quantity'].sum().unstack().fillna(0). \ applymap(lambda x: 1 if x > 0 else 0) germany_inv_pro_df = create_invoice_product_df(df_fr) #!!!! 1 ve 0 lar bir sepette ne kadar ürün olup olmadığını gösteriyor germany_inv_pro_df.head(220) ############################################ # Birliktelik Kurallarının Çıkarılması ############################################ #apriori fonskiynu bize itemlerin frekanslarını verecek #supportları hesapladık frequent_itemsets = apriori(germany_inv_pro_df, min_support=0.01, use_colnames=True) frequent_itemsets.sort_values("support", ascending=False) #supportlara göre, min_thresholda göre kuralları çıkardık rules = association_rules(frequent_itemsets, metric="support",