def q05_feature_engineering_part3(path): tss,df=q01_load_data(path) df.info() df['hour']=df['Datetime'].dt.hour df['month']=df['Datetime'].dt.strftime('%b') sns.factorplot(x='month', y='Demand', data=df, kind='box', order=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug','Sep', 'Oct', 'Nov', 'Dec'], size=8, aspect=float(16/7))
def q03_time_plot(path): shape, data = q01_load_data(path) plt.figure(figsize=(16, 7)) plt.plot(data.Datetime, data.Demand) plt.title('Electricity Demand in Australia for a year') plt.xlabel('Time') plt.ylabel('Demand') plt.show()
def q03_time_plot(path): path = 'data/elecdemand.csv' shape, df = q01_load_data(path) plt.plot(df['Datetime'], df['Demand']) plt.title('Electricity Demand for Australia for a year') plt.xlabel('Year-Month') plt.ylabel('Demand') plt.show()
def q05_feature_engineering_part4(path): df_shape, df = q01_load_data(path) df['hour'] = df['Datetime'].dt.hour df['month'] = df['Datetime'].dt.strftime('%b') df['Peakhours'] = ((df['hour'] >= 6) & (df['hour'] < 20)) * 1 df['Peakmonths'] = df['month'].apply( lambda x: 1 if x in ['Feb', 'May', 'Jun', 'Jul', 'Aug'] else 0) return df
def q04_boxplot(path): shp,df=q01_load_data(path) plt.figure(figsize=(16, 7)) sns.factorplot(x='WorkDay', y='Demand', data=df, kind='box', size=8, aspect=float(16/7)) plt.xlabel('Workday') plt.ylabel('Demand') plt.title('Each workday demand in Australia')
def q03_time_plot(path): shp, df = q01_load_data(path) plt.figure(figsize=(16, 6)) plt.plot(df['Datetime'], df['Demand']) plt.xlabel('Time') plt.ylabel('Demand') plt.title('Electricity Demand in Australia for a year') plt.show()
def q02_data_splitter(path): path = 'data/elecdemand.csv' shape, df = q01_load_data(path) tscv = TimeSeriesSplit(n_splits=2) com_idx = [] for train_index, valid_index in tscv.split(df): com_idx.append((train_index, valid_index)) return com_idx
def q05_feature_engineering(path): shape,data = q01_load_data(path) corr = data['Temperature'].corr(data['Demand']) plt.figure(figsize=(16,6)) plt.scatter(data['Temperature'],data['Demand']) plt.title('Temperature vs. Demand') plt.xlabel('Temperature') plt.ylabel('Demand') plt.show()
def q05_feature_engineering_part4(path): shape, data = q01_load_data(path) data['hour'] = data['Datetime'].dt.hour data['month'] = data['Datetime'].dt.strftime('%b') data['Peakhours'] = data['hour'].apply(lambda x: 1 if x in range(6, 20) else 0) data['Peakmonths'] = data['month'].apply( lambda x: 1 if x in ['Feb', 'May', 'Jun', 'Jul', 'Aug'] else 0) return data
def q05_feature_engineering_part2(path): df_shape, df = q01_load_data(path) df['hour'] = df['Datetime'].dt.hour hours = [] for i in range(24): one = df[df['hour'] == i]['Demand'].values hours.append(one) plt.boxplot(hours, labels=[str(i) for i in range(24)]) plt.show()
def q05_feature_engineering(path): path = 'data/elecdemand.csv' shape, df = q01_load_data(path) corr, p_value = pearsonr(df['Temperature'], df['Demand']) plt.scatter(df['Temperature'], df['Demand']) plt.xlabel('Temperature') plt.ylabel('Demand') plt.title('Temperature vs Demand') plt.show()
def q02_data_splitter(path): seed = 9 shape, df = q01_load_data(path) tssf = TimeSeriesSplit(n_splits=3) trainl = () validl = () for train_index, valid_index in tssf.split(df): trainl = trainl + tuple(train_index) validl = validl + tuple(valid_index) return [[trainl, trainl], [validl, validl]]
def q05_feature_engineering_part3(path): df_shape, df = q01_load_data(path) df['month'] = df['Datetime'].dt.strftime('%b') demands = [] months = [ 'Jan', 'Feb', 'Mar', 'Apr', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' ] for month in months: temp = df[df['month'] == month]['Demand'] demands.append(list(temp)) plt.boxplot(demands, labels=months)
def q05_feature_engineering_part3(path): shape, data = q01_load_data(path) data['Month'] = data['Datetime'].dt.strftime('%b') sns.factorplot(x='Month', y='Demand', data=data, kind='box', size=8, aspect=(16 / 9)) plt.title('Change in Demand of Electricity wrt Month') plt.xlabel('Month') plt.ylabel('Demand') plt.show()
def q04_boxplot(path): shape, data = q01_load_data(path) data.head() plt.figure(figsize=(16, 7)) sns.factorplot(x='WorkDay', y='Demand', data=data, kind='box', size=8, aspect=float(16 / 7)) plt.xlabel('Workday') plt.ylabel('Demand') plt.title('Change in Electricity Demand wrt to Demand') plt.show()
def q05_feature_engineering_part2(path): shape, data = q01_load_data(path) data['hour'] = data['Datetime'].dt.hour hour = [] for i in range(24): hour_demand = data[data['hour'] == i]['Demand'].values hour.append(hour_demand) plt.figure(figsize=(16, 6)) plt.boxplot(hour, labels=[str(i) for i in range(24)]) plt.xlabel('Hour') plt.ylabel('Demand') plt.title('Change in Electricity demand wrt to Hour') plt.show()
def q05_feature_engineering_part2(path): shape, data = q01_load_data(path) 'write your solution here' data['hour'] = data['Datetime'].dt.hour plt.figure(figsize=(16, 6)) hours = [] for i in range(24): one = data[data['hour'] == i]['Demand'].values hours.append(one) plt.boxplot(hours, labels=[str(i) for i in range(24)]) plt.xlabel('Hour') plt.ylabel('Demand') plt.title('Change in Electricity demand wrt to Hour')
def q05_feature_engineering_part3(path): path = 'data/elecdemand.csv' shape, data = q01_load_data(path) data['hour'] = data['Datetime'].dt.hour data['month'] = data['Datetime'].dt.month plt.figure(figsize=(16, 6)) demand_hours = [] for i in range(1, 25): one = data[data['hour'] == i]['Demand'].values demand_hours.append(one) demand_months = [] for j in range(1, 13): demand_months.append(data[data['month'] == j]['Demand'].values) plt.subplot(211) plt.boxplot(demand_hours, labels=[str(i) for i in range(1, 25)]) plt.xlabel('Hour') plt.ylabel('Demand') plt.title('Change in Electricity demand wrt to Hour') plt.subplot(212) plt.boxplot(demand_months, labels=[str(i) for i in range(1, 13)]) plt.xlabel('Months') plt.ylabel('Demand') plt.title('Change in Electricity demand wrt to months') plt.show()
def q04_boxplot(path): path = 'data/elecdemand.csv' shape, df = q01_load_data(path) df.boxplot(column=['Demand'], by=['WorkDay']) plt.show()
def q04_boxplot(path): df_shape, df = q01_load_data(path) sns.boxplot(x='WorkDay', y='Demand', data=df)
def q03_time_plot(path): df_shape,df = q01_load_data(path) plt.plot(df['Datetime'],df['Demand'])
def q02_data_splitter(path): np.random.seed(9) shape, data = q01_load_data(path) tscv = TimeSeriesSplit(n_splits=2) split_data = list(tscv.split(data)) return split_data
def q05_feature_engineering(path): df_shape, df = q01_load_data(path) plt.scatter(df['Temperature'], df['Demand'])
def q05_feature_engineering(path): tss, df = q01_load_data(path) p_corf = np.corrcoef(df['Temperature'], df['Demand']) plt.scatter(df['Temperature'], df['Demand']) plt.show()
def q02_data_splitter(path): np.random.seed(9) df_shape,df = q01_load_data(path) time_series_Split = TimeSeriesSplit(n_splits=2) train = time_series_Split.split(X=df) return list(train)