import matplotlib matplotlib.use("Qt5Agg") import numpy as np import matplotlib.pyplot as plt import pandas as pd from exploratoryUtils import basic_info_df dataset_store = pd.read_csv("./data/store.csv", encoding="utf-8") dataset_train = pd.read_csv("./data/train.csv", encoding="utf-8") dataset_test = pd.read_csv("./data/test.csv", encoding="utf-8") basic_info_df(dataset_store) basic_info_df(dataset_train) basic_info_df(dataset_test) # Convert the column Date from object into datetime dataset_train["Date"] = pd.to_datetime(dataset_train["Date"], format="%Y-%m-%d") plt.plot_date(dataset_train.loc[dataset_train.Store == 1, "Date"], dataset_train.loc[dataset_train.Store == 1, "Sales"]) plt.title("Sales for Store 1 per date") plt.savefig("./image/sales_store_1.png") plt.show() temp = dataset_train.groupby("Date")["Sales"] dates = np.unique(dataset_train.loc[:, "Date"].values) plt.plot_date(dates, temp.apply(lambda x: x.mean()), "ro", label="Mean")
import matplotlib matplotlib.use('Qt5Agg') import numpy as np import matplotlib.pyplot as plt import pandas as pd from exploratoryUtils import basic_info_df dataset_store = pd.read_csv("./data/store.csv", encoding="utf-8") dataset_train = pd.read_csv("./data/train.csv", encoding="utf-8") dataset_test = pd.read_csv("./data/test.csv", encoding="utf-8") basic_info_df(dataset_store) basic_info_df(dataset_train) basic_info_df(dataset_test) # Convert the column Date from object into datetime dataset_train['Date'] = pd.to_datetime(dataset_train['Date'], format='%Y-%m-%d') plt.plot_date(dataset_train.loc[dataset_train.Store == 1, 'Date'], dataset_train.loc[dataset_train.Store == 1, 'Sales']) plt.title("Sales for Store 1 per date") plt.savefig("./image/sales_store_1.png") plt.show() temp = dataset_train.groupby('Date')['Sales'] dates = np.unique(dataset_train.loc[:, 'Date'].values) plt.plot_date(dates, temp.apply(lambda x: x.mean()), 'ro', label="Mean")
import matplotlib matplotlib.use('Qt5Agg') import matplotlib.pyplot as plt import pandas as pd from exploratoryUtils import basic_info_df dataset_gift = pd.read_csv("./data/gifts.csv") basic_info_df(dataset_gift) # s for the size of the points / alpha for the transparency dataset_gift.plot.scatter('Longitude', 'Latitude', s=1, color="yellow", alpha=0.5) plt.title("Gift location (%d gifts)" % len(dataset_gift)) plt.show() plt.savefig("./image/gift_location.png")