Esempio n. 1
0
#The process of reading the data set.

df_ = pd.read_excel(
    r"C:\Users\LENOVO\PycharmProjects\DSMLBC4\datasets\online_retail_II.xlsx",
    sheet_name="Year 2010-2011")
df = df_.copy()
df.info()
df.head()

from helpers.helpers import check_df
check_df(df)

from helpers.helpers import crm_data_prep

df = crm_data_prep(df)
check_df(df)

#Country selection -> [Germany]
df_ger = df[df['Country'] == "Germany"]
check_df(df_ger)

df_ger.groupby(['Invoice', 'StockCode']).agg({"Quantity": "sum"}).head(100)

df_ger.groupby(['Invoice', 'StockCode']).agg({
    "Quantity": "sum"
}).unstack().iloc[0:6, 0:12]

# Control
df[(df["StockCode"] == 16016) & (df["Invoice"] == 536983)]
Esempio n. 2
0
query = "select * from online_retail_2010_2011"
df_mysql = pd.read_sql_query(query, conn)

df.head()
df_mysql.head()

df.info()
df_mysql.info()

df_mysql["InvoiceDate"] = pd.to_datetime(df_mysql["InvoiceDate"])
df_mysql.rename(columns={"CustomerID": "Customer ID"}, inplace=True)

df.head()

from helpers.helpers import crm_data_prep
df_prep = crm_data_prep(df)

df_prep.head()

from helpers.helpers import check_df

check_df(df_prep)


def create_cltv_p(dataframe):
    today_date = dt.datetime(2011, 12, 11)

    ## recency kullanıcıya özel dinamik.
    rfm = dataframe.groupby('Customer ID').agg({
        'InvoiceDate': [
            lambda date: (date.max() - date.min()).days, lambda date: