#The process of reading the data set. df_ = pd.read_excel( r"C:\Users\LENOVO\PycharmProjects\DSMLBC4\datasets\online_retail_II.xlsx", sheet_name="Year 2010-2011") df = df_.copy() df.info() df.head() from helpers.helpers import check_df check_df(df) from helpers.helpers import crm_data_prep df = crm_data_prep(df) check_df(df) #Country selection -> [Germany] df_ger = df[df['Country'] == "Germany"] check_df(df_ger) df_ger.groupby(['Invoice', 'StockCode']).agg({"Quantity": "sum"}).head(100) df_ger.groupby(['Invoice', 'StockCode']).agg({ "Quantity": "sum" }).unstack().iloc[0:6, 0:12] # Control df[(df["StockCode"] == 16016) & (df["Invoice"] == 536983)]
query = "select * from online_retail_2010_2011" df_mysql = pd.read_sql_query(query, conn) df.head() df_mysql.head() df.info() df_mysql.info() df_mysql["InvoiceDate"] = pd.to_datetime(df_mysql["InvoiceDate"]) df_mysql.rename(columns={"CustomerID": "Customer ID"}, inplace=True) df.head() from helpers.helpers import crm_data_prep df_prep = crm_data_prep(df) df_prep.head() from helpers.helpers import check_df check_df(df_prep) def create_cltv_p(dataframe): today_date = dt.datetime(2011, 12, 11) ## recency kullanıcıya özel dinamik. rfm = dataframe.groupby('Customer ID').agg({ 'InvoiceDate': [ lambda date: (date.max() - date.min()).days, lambda date: