def q03_stacked_point_plot(path, x_column_name='month', y_column_name='Sales', hue='year', order_of_the_axis=[ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' ]): X_train, _ = q02_data_splitter(path) X_train['Year'] = X_train['Month'].dt.year X_train['month'] = X_train['Month'].dt.strftime('%b') plt.figure(figsize=(16, 4)) sns.pointplot('month', 'Sales', hue='Year', data=X_train, order=[ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' ]) plt.ylabel('Sales') plt.xlabel('month') plt.legend(loc='upper left') plt.show()
def q03_stacked_point_plot(path, x_column_name='month', y_column_name='Sales', hue='year', order_of_the_axis=[ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' ]): x_train, x_valid = q02_data_splitter(path) #x_train['Month'] = pd.to_datetime(df['Month']) x_train['year'] = x_train['Month'].dt.year x_train['month'] = x_train['Month'].dt.strftime('%b') plt.figure(figsize=(16, 7)) sns.pointplot(x='month', y='Sales', hue='year', data=x_train, x_order=[ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' ]) plt.xlabel('month') plt.ylabel('$ millions') plt.title('Sales') plt.legend(loc='upper right')
def q04_boxplot(path,x='month',y='Sales',kind='box',order_of_the_axis=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug','Sep', 'Oct', 'Nov', 'Dec'],size=8): X_train,X_valid=q02_data_splitter(path) X_train['year']=X_train['Month'].dt.year X_train['month']=X_train['Month'].dt.strftime('%b') plt.figure(figsize=(16, 4)) sns.factorplot(x=x, y=y, data=X_train, kind=kind, order=order_of_the_axis, size=size, aspect=float(16/7)) plt.xlabel('month') plt.ylabel('Sales')
def q03_stacked_point_plot(path,x_column_name='month',y_column_name='Sales',hue='year',order_of_the_axis=['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']): X_train,X_test = q02_data_splitter(path) X_train['month'] = X_train['Month'].dt.strftime('%b') X_train['year'] = X_train['Month'].dt.year sns.pointplot( x=x_column_name,y=y_column_name,hue=hue,data=X_train,x_order=order_of_the_axis ) plt.show()
def q05_sarima_model(path): train, validation = q02_data_splitter(path) train.index = train['Month'] train = train[['Sales']] train.index.name = None validation.index = validation['Month'] validation = validation[['Sales']] validation.index.name = None return train, validation
def q03_stacked_point_plot(path, x_column_name="month", y_column_name="Sales", hue="year", order_of_the_axis=[ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' ]): train, validation = q02_data_splitter(path)
def q03_stacked_point_plot(path,x_column_name='month',y_column_name='Sales',hue='year',order_of_the_axis=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug','Sep', 'Oct', 'Nov', 'Dec']): X_train,X_valid=q02_data_splitter(path) X_train['year'] = X_train['Month'].dt.year X_train['month'] = X_train['Month'].dt.strftime('%b') plt.figure(figsize=(16, 7)) sns.pointplot(x=x_column_name, y=y_column_name, hue=hue, data=X_train, x_order=order_of_the_axis) plt.xlabel('month') plt.ylabel('Sales') plt.title('Stacked point chart') plt.legend(loc='upper right')
def q04_boxplot(path, x="month", y="Sales", kind="box", order=[ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' ], size=8): train, validation = q02_data_splitter(path) "write your solution here"
def q05_sarima_model(path): path = 'data/perrin-freres-monthly-champagne.csv' train, validation = q02_data_splitter(path) tss = pd.DataFrame(train[['Sales']]) #tss['Month'] = pd.to_datetime(tss['Month']) tss.column = ['Sales'] tss.index = train['Month'].values tss_valid = pd.DataFrame(validation['Sales']) tss_valid.column = ['Sales'] tss_valid.index = validation['Month'].values return tss, tss_valid
def q04_boxplot(path, x='month', y='Sales', kind='box', order=[ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' ], size=8): train, validation = q02_data_splitter(path) sns.factorplot(data=train, x=x, y=y, kind=kind, order=order, size=size) plt.xlabel('month') plt.ylable('Sales') plt.show()
def q04_boxplot(path, x='month', y='Sales', kind='box', order_of_the_axis=[ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' ], temp=8): X_train, X_test = q02_data_splitter(path) X_train['month'] = X_train['Month'].dt.strftime('%b') X_train['year'] = X_train['Month'].dt.year sns.factorplot(x=x, y=y, data=X_train, kind=kind) plt.show()
def q05_sarima_model(data): train, validation = q02_data_splitter(data) train = pd.DataFrame(train) validation = pd.DataFrame(validation) # Train dataframe tss = pd.DataFrame(train['Sales']) tss.column = ['Sales'] tss.index = train['Month'].values tss_valid = pd.DataFrame(validation['Sales']) tss_valid.column = ['Sales'] tss_valid.index = validation['Month'].values return tss, tss_valid
def q04_boxplot(path, x='month', y='Sales', kind='box', order=[ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' ], size=8): 'write your solution here' train, validation = q02_data_splitter(path) train['year'] = train['Month'].dt.year train['month'] = train['Month'].dt.strftime('%b') plt.figure(figsize=(16, 7)) sns.factorplot(x=x, y=y, data=train, kind=kind, order=order, size=size) plt.xlabel('Month') plt.ylabel('Sale volumn in millions') plt.title('Monthly sales of champagne aggregated monthly')
def q04_boxplot(path, x='month', y='Sales', kind='box', order=[ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' ], size=8): x_train, x_valid = q02_data_splitter(path) x_train['month'] = x_train['Month'].dt.strftime('%b') x_train['year'] = x_train['Month'].dt.year plt.figure(figsize=(16, 7)) sns.factorplot(x=x, y=y, data=x_train, kind=kind, order=order, size=size) plt.xlabel('Month') plt.ylabel('Sales') plt.title('Sales in Millions')
def q03_stacked_point_plot(path, x_column_name='month', y_column_name='Sales', hue='year', order_of_the_axis=[ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' ]): train, validation = q02_data_splitter(path) #'write your solution here' ts = pd.DataFrame(train) ts['year'] = ts['Month'].dt.year ts['month'] = ts['Month'].dt.strftime('%b') return sns.pointplot(x=x_column_name, y=y_column_name, hue=hue, data=ts, x_order=order_of_the_axis)
def q03_stacked_point_plot(path, x_column_name='month', y_column_name='Sales', hue='year', order_of_the_axis=[ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' ]): train, validation = q02_data_splitter(path) train['year'] = train['Month'].dt.year train['month'] = train['Month'].dt.strftime('%b') sns.pointplot(data=train, x=x_column_name, y=y_column_name, hue=train['year'], order=order_of_the_axis) plt.xlabel('month') plt.ylabel('Sales') plt.legend(loc) plt.show()
def q05_sarima_model(path): train, validation = q02_data_splitter(path) "write your solution here"
# %load q05_sarima_model/build.py import pandas as pd import numpy as np from sklearn.model_selection import train_test_split import seaborn as sns import matplotlib.pyplot as plt # import sys # sys.path.append('./') from greyatomlib.time_series_101_project.q02_data_splitter.build import q02_data_splitter path = 'data/perrin-freres-monthly-champagne.csv' plt.switch_backend('agg') train, validation = q02_data_splitter(path) 'write your solution here' def q05_sarima_model(path): train, validation = q02_data_splitter(path) train.index = train['Month'] train = train[['Sales']] train.index.name = None validation.index = validation['Month'] validation = validation[['Sales']] validation.index.name = None return train, validation