def q05_feature_engineering_part3(path):
    tss,df=q01_load_data(path)
    df.info()
    df['hour']=df['Datetime'].dt.hour
    df['month']=df['Datetime'].dt.strftime('%b')
    
    sns.factorplot(x='month', y='Demand', data=df, kind='box', order=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug','Sep', 'Oct', 'Nov', 'Dec'], size=8, aspect=float(16/7))
def q03_time_plot(path):
    shape, data = q01_load_data(path)
    plt.figure(figsize=(16, 7))
    plt.plot(data.Datetime, data.Demand)
    plt.title('Electricity Demand in Australia for a year')
    plt.xlabel('Time')
    plt.ylabel('Demand')
    plt.show()
Beispiel #3
0
def q03_time_plot(path):
    path = 'data/elecdemand.csv'
    shape, df = q01_load_data(path)
    plt.plot(df['Datetime'], df['Demand'])
    plt.title('Electricity Demand for Australia for a year')
    plt.xlabel('Year-Month')
    plt.ylabel('Demand')
    plt.show()
Beispiel #4
0
def q05_feature_engineering_part4(path):
    df_shape, df = q01_load_data(path)
    df['hour'] = df['Datetime'].dt.hour
    df['month'] = df['Datetime'].dt.strftime('%b')
    df['Peakhours'] = ((df['hour'] >= 6) & (df['hour'] < 20)) * 1
    df['Peakmonths'] = df['month'].apply(
        lambda x: 1 if x in ['Feb', 'May', 'Jun', 'Jul', 'Aug'] else 0)
    return df
Beispiel #5
0
def q04_boxplot(path):
    
    shp,df=q01_load_data(path)
    plt.figure(figsize=(16, 7))
    sns.factorplot(x='WorkDay', y='Demand', data=df, kind='box', size=8, aspect=float(16/7))
    plt.xlabel('Workday')
    plt.ylabel('Demand')
    plt.title('Each workday demand in Australia')
Beispiel #6
0
def q03_time_plot(path):
    shp, df = q01_load_data(path)
    plt.figure(figsize=(16, 6))
    plt.plot(df['Datetime'], df['Demand'])
    plt.xlabel('Time')
    plt.ylabel('Demand')
    plt.title('Electricity Demand in Australia for a year')
    plt.show()
def q02_data_splitter(path):
    path = 'data/elecdemand.csv'
    shape, df = q01_load_data(path)
    tscv = TimeSeriesSplit(n_splits=2)
    com_idx = []
    for train_index, valid_index in tscv.split(df):
        com_idx.append((train_index, valid_index))
    return com_idx
Beispiel #8
0
def q05_feature_engineering(path):
    shape,data = q01_load_data(path)
    corr = data['Temperature'].corr(data['Demand'])
    plt.figure(figsize=(16,6))
    plt.scatter(data['Temperature'],data['Demand'])
    plt.title('Temperature vs. Demand')
    plt.xlabel('Temperature')
    plt.ylabel('Demand')
    plt.show()
def q05_feature_engineering_part4(path):
    shape, data = q01_load_data(path)
    data['hour'] = data['Datetime'].dt.hour
    data['month'] = data['Datetime'].dt.strftime('%b')
    data['Peakhours'] = data['hour'].apply(lambda x: 1
                                           if x in range(6, 20) else 0)
    data['Peakmonths'] = data['month'].apply(
        lambda x: 1 if x in ['Feb', 'May', 'Jun', 'Jul', 'Aug'] else 0)
    return data
Beispiel #10
0
def q05_feature_engineering_part2(path):
    df_shape, df = q01_load_data(path)
    df['hour'] = df['Datetime'].dt.hour
    hours = []
    for i in range(24):
        one = df[df['hour'] == i]['Demand'].values
        hours.append(one)
    plt.boxplot(hours, labels=[str(i) for i in range(24)])
    plt.show()
Beispiel #11
0
def q05_feature_engineering(path):
    path = 'data/elecdemand.csv'
    shape, df = q01_load_data(path)
    corr, p_value = pearsonr(df['Temperature'], df['Demand'])
    plt.scatter(df['Temperature'], df['Demand'])
    plt.xlabel('Temperature')
    plt.ylabel('Demand')
    plt.title('Temperature vs Demand')
    plt.show()
Beispiel #12
0
def q02_data_splitter(path):
    seed = 9
    shape, df = q01_load_data(path)
    tssf = TimeSeriesSplit(n_splits=3)
    trainl = ()
    validl = ()
    for train_index, valid_index in tssf.split(df):
        trainl = trainl + tuple(train_index)
        validl = validl + tuple(valid_index)
    return [[trainl, trainl], [validl, validl]]
Beispiel #13
0
def q05_feature_engineering_part3(path):
    df_shape, df = q01_load_data(path)
    df['month'] = df['Datetime'].dt.strftime('%b')
    demands = []
    months = [
        'Jan', 'Feb', 'Mar', 'Apr', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov',
        'Dec'
    ]
    for month in months:
        temp = df[df['month'] == month]['Demand']
        demands.append(list(temp))
    plt.boxplot(demands, labels=months)
def q05_feature_engineering_part3(path):
    shape, data = q01_load_data(path)
    data['Month'] = data['Datetime'].dt.strftime('%b')
    sns.factorplot(x='Month',
                   y='Demand',
                   data=data,
                   kind='box',
                   size=8,
                   aspect=(16 / 9))
    plt.title('Change in Demand of Electricity wrt Month')
    plt.xlabel('Month')
    plt.ylabel('Demand')
    plt.show()
Beispiel #15
0
def q04_boxplot(path):
    shape, data = q01_load_data(path)
    data.head()
    plt.figure(figsize=(16, 7))
    sns.factorplot(x='WorkDay',
                   y='Demand',
                   data=data,
                   kind='box',
                   size=8,
                   aspect=float(16 / 7))
    plt.xlabel('Workday')
    plt.ylabel('Demand')
    plt.title('Change in Electricity Demand wrt to Demand')
    plt.show()
Beispiel #16
0
def q05_feature_engineering_part2(path):
    shape, data = q01_load_data(path)
    data['hour'] = data['Datetime'].dt.hour

    hour = []
    for i in range(24):
        hour_demand = data[data['hour'] == i]['Demand'].values
        hour.append(hour_demand)
    plt.figure(figsize=(16, 6))
    plt.boxplot(hour, labels=[str(i) for i in range(24)])
    plt.xlabel('Hour')
    plt.ylabel('Demand')
    plt.title('Change in Electricity demand wrt to Hour')
    plt.show()
def q05_feature_engineering_part2(path):
    shape, data = q01_load_data(path)
    'write your solution here'
    data['hour'] = data['Datetime'].dt.hour

    plt.figure(figsize=(16, 6))

    hours = []
    for i in range(24):
        one = data[data['hour'] == i]['Demand'].values
        hours.append(one)
    plt.boxplot(hours, labels=[str(i) for i in range(24)])
    plt.xlabel('Hour')
    plt.ylabel('Demand')
    plt.title('Change in Electricity demand wrt to Hour')
Beispiel #18
0
def q05_feature_engineering_part3(path):
    path = 'data/elecdemand.csv'
    shape, data = q01_load_data(path)
    data['hour'] = data['Datetime'].dt.hour
    data['month'] = data['Datetime'].dt.month
    plt.figure(figsize=(16, 6))
    demand_hours = []
    for i in range(1, 25):
        one = data[data['hour'] == i]['Demand'].values
        demand_hours.append(one)
    demand_months = []
    for j in range(1, 13):
        demand_months.append(data[data['month'] == j]['Demand'].values)
    plt.subplot(211)
    plt.boxplot(demand_hours, labels=[str(i) for i in range(1, 25)])
    plt.xlabel('Hour')
    plt.ylabel('Demand')
    plt.title('Change in Electricity demand wrt to Hour')
    plt.subplot(212)
    plt.boxplot(demand_months, labels=[str(i) for i in range(1, 13)])
    plt.xlabel('Months')
    plt.ylabel('Demand')
    plt.title('Change in Electricity demand wrt to months')
    plt.show()
def q04_boxplot(path):
    path = 'data/elecdemand.csv'
    shape, df = q01_load_data(path)
    df.boxplot(column=['Demand'], by=['WorkDay'])
    plt.show()
Beispiel #20
0
def q04_boxplot(path):
    df_shape, df = q01_load_data(path)
    sns.boxplot(x='WorkDay', y='Demand', data=df)
def q03_time_plot(path):
    df_shape,df = q01_load_data(path)
    plt.plot(df['Datetime'],df['Demand'])
def q02_data_splitter(path):
    np.random.seed(9)
    shape, data = q01_load_data(path)
    tscv = TimeSeriesSplit(n_splits=2)
    split_data = list(tscv.split(data))
    return split_data
def q05_feature_engineering(path):
    df_shape, df = q01_load_data(path)
    plt.scatter(df['Temperature'], df['Demand'])
def q05_feature_engineering(path):
    tss, df = q01_load_data(path)
    p_corf = np.corrcoef(df['Temperature'], df['Demand'])
    plt.scatter(df['Temperature'], df['Demand'])
    plt.show()
Beispiel #25
0
def q02_data_splitter(path):
    np.random.seed(9)
    df_shape,df = q01_load_data(path)
    time_series_Split = TimeSeriesSplit(n_splits=2)
    train = time_series_Split.split(X=df)
    return list(train)