def prep_store_data(): #get the data from acquire file store = acquire.get_all_data() #Add a 'month' and 'day of week' column store["month"] = store.index.month_name() store["weekday"] = store.index.day_name() #Add sales total column from sale_amount and item_price store["sales_total"] = store.sale_amount * store.item_price #new column that is the result of the current sales - the previous days sales. store["sales_diff(1)"] = store.sales_total.diff(1) # Change dtypes of numeric columns to object and category store = (store.astype({ 'sale_id': object, 'store_id': object, 'store_zipcode': object, 'item_id': object, 'item_upc12': object, 'item_upc14': object, 'month': 'category', 'weekday': 'category' })) numeric_hists(store) return store
def prepare_sale(): df = acquire.get_all_data(use_cache=True) df['sale_date'] = pd.to_datetime(df['sale_date']) df = df.sort_values(by='sale_date').set_index('sale_date') df['month'] = df.index.strftime('%m-%b') df['day_of_week'] = df.index.strftime('%w-%A') df['total_sales'] = df['sale_amount'] * df['item_price'] sales_sum = df.resample("D")[['total_sales']].sum() sales_sum['sales_differences'] = sales_sum['total_sales'].diff() return df
def prep_sales_data(): df = acquire.get_all_data(use_cache=True) df['sale_date'] = pd.to_datetime(df['sale_date']) df.sale_amount.plot().set_title( 'The distribution of sale amount over time') df.item_price.plot().set_title('The distribution of item price over time') df = df.sort_values('sale_date').set_index('sale_date') df["month"] = df.index.month_name() df["day_of_week"] = df.index.day_name() df['sales_total'] = df.sale_amount * df.item_price return df
def prepare_store_data(): # convert date to date_time format df = acquire.get_all_data() df['sale_date'] = pd.to_datetime(df['sale_date']) #sort df by `sale_date` df.sort_values('sale_date', inplace = True) #set the index to tbe the datetime variable by_date = df.set_index('sale_date') # create 'month' column by_date['month'] = list(by_date.index.month) # create 'nameofdayofweek' column by_date['nameofdayofweek'] = list(by_date.index.weekday_name) # create 'dayofweek' by_date['dayofweek'] = list(by_date.index.dayofweek) # create 'sales_total' column by_date['sales_total'] = by_date['sale_amount']*by_date['item_price'] return by_date
import acquire import pandas as pd from datetime import timedelta, datetime import numpy as np df = acquire.get_all_data() # format fmt = '%a, %d %b %Y %H:%M:%S %Z' df.sale_date = pd.to_datetime(df.sale_date, format=fmt) df = df.sort_values(by='sale_date').set_index('sale_date') df.sale_amount.plot() df.item_price.plot() df['month'] = df.index.strftime('%m-%b') df['weekday'] = df.index.strftime('%w-%a') df.head() df[['month', 'weekday']].head() (df.sale_amount.astype('int') == df.sale_amount).all() df.sale_amount = df.sale_amount.astype('int') df.item_price.plot.hist() df['sales_total'] = df.sale_amount * df.item_price df.drop(columns='sale_total') df.head()