def prep_store_data():
    #get the data from acquire file
    store = acquire.get_all_data()

    #Add a 'month' and 'day of week' column
    store["month"] = store.index.month_name()
    store["weekday"] = store.index.day_name()

    #Add sales total column from sale_amount and item_price
    store["sales_total"] = store.sale_amount * store.item_price

    #new column that is the result of the current sales - the previous days sales.
    store["sales_diff(1)"] = store.sales_total.diff(1)

    # Change dtypes of numeric columns to object and category
    store = (store.astype({
        'sale_id': object,
        'store_id': object,
        'store_zipcode': object,
        'item_id': object,
        'item_upc12': object,
        'item_upc14': object,
        'month': 'category',
        'weekday': 'category'
    }))

    numeric_hists(store)

    return store
예제 #2
0
def prepare_sale():
    df = acquire.get_all_data(use_cache=True)
    df['sale_date'] = pd.to_datetime(df['sale_date'])
    df = df.sort_values(by='sale_date').set_index('sale_date')
    df['month'] = df.index.strftime('%m-%b')
    df['day_of_week'] = df.index.strftime('%w-%A')
    df['total_sales'] = df['sale_amount'] * df['item_price']
    sales_sum = df.resample("D")[['total_sales']].sum()
    sales_sum['sales_differences'] = sales_sum['total_sales'].diff()
    return df
def prep_sales_data():
    df = acquire.get_all_data(use_cache=True)
    df['sale_date'] = pd.to_datetime(df['sale_date'])
    df.sale_amount.plot().set_title(
        'The distribution of sale amount over time')
    df.item_price.plot().set_title('The distribution of item price over time')
    df = df.sort_values('sale_date').set_index('sale_date')
    df["month"] = df.index.month_name()
    df["day_of_week"] = df.index.day_name()
    df['sales_total'] = df.sale_amount * df.item_price
    return df
예제 #4
0
def prepare_store_data():
    # convert date to date_time format
    df = acquire.get_all_data()
    df['sale_date'] = pd.to_datetime(df['sale_date'])

    #sort df by `sale_date`
    df.sort_values('sale_date', inplace = True)

    #set the index to tbe the datetime variable
    by_date = df.set_index('sale_date')

    # create 'month' column
    by_date['month'] = list(by_date.index.month)

    # create 'nameofdayofweek' column
    by_date['nameofdayofweek'] = list(by_date.index.weekday_name)

    # create 'dayofweek'
    by_date['dayofweek'] = list(by_date.index.dayofweek)

    # create 'sales_total' column
    by_date['sales_total'] = by_date['sale_amount']*by_date['item_price']

    return by_date
예제 #5
0
import acquire
import pandas as pd

from datetime import timedelta, datetime
import numpy as np

df = acquire.get_all_data()

# format
fmt = '%a, %d %b %Y %H:%M:%S %Z'

df.sale_date = pd.to_datetime(df.sale_date, format=fmt)

df = df.sort_values(by='sale_date').set_index('sale_date')
df.sale_amount.plot()
df.item_price.plot()

df['month'] = df.index.strftime('%m-%b')
df['weekday'] = df.index.strftime('%w-%a')
df.head()
df[['month', 'weekday']].head()

(df.sale_amount.astype('int') == df.sale_amount).all()

df.sale_amount = df.sale_amount.astype('int')

df.item_price.plot.hist()

df['sales_total'] = df.sale_amount * df.item_price
df.drop(columns='sale_total')
df.head()