def show_households_details(df, household_id, datetime_col): ets = ExtractTimeSeries(datetime_col, 'KWH/hh (per half hour) ') df = ets.transform(df) days = extract_days(df) # temp = pd.DatetimeIndex(df.index) df['weekday'] = temp.weekday df_weekdays = df[df['weekday'] <= 4] weekdays = extract_days(df_weekdays) df_weekends = df[df['weekday'] > 4] weekends = extract_days(df_weekends) print print 'The total number of days for the household {} is: {}, where weekdays: {} and weekends: {}'\ .format(household_id,len(days),len(weekdays),len(weekends)) print
def train_test_split(self, df): household_id = self.environment_params['household_id'].values[0] part_of_week = self.environment_params['part_of_week'].values[0] train_days = self.environment_params['train_days'].values[0] if train_days < 10: print 'There must be at least 10 days in training set!' return # days = extract_days(df) print '{}th day: {}'.format(train_days, days[train_days-1]) test_set_first_date = days[train_days] # df_train = df.query('index < @test_set_first_date') df_test = df.query('index >= @test_set_first_date') print print 'Training data set' print df_train.head() print df_train.tail() print print 'Test data set' print df_test.head() print df_test.tail() print_process('Saving Train and Test Data') path_to_train_data = '../clean_data/'+household_id+'_train_'+part_of_week+'_'+str(train_days)+'.csv' path_to_test_data = '../clean_data/'+household_id+'_test_'+part_of_week+'_'+str(train_days)+'.csv' df_train.to_csv(path_to_train_data) df_test.to_csv(path_to_test_data) print 'Train data saved into: {}'.format(path_to_train_data) print 'Test data saved into: {}'.format(path_to_test_data) return df_train, df_test
def main(): # # Loading data into pd.DataFrames path_to_demand = '../data/MAC000002_train_weekdays_3.csv' demand = pd.read_csv(path_to_demand, parse_dates=True, index_col='Unnamed: 0') demand = demand.resample('1H').sum() days = extract_days(demand) # path_to_price = '../data/price_data_London.csv' price = pd.read_csv(path_to_price, parse_dates=True, index_col='Unnamed: 0') price = price.resample('1H').pad() # solar = pd.read_csv('../data/Generic Run Results.csv') solar.drop('Time stamp', axis=1, inplace=True) solar = solar.head(24 * len(days)) solar = pd.DataFrame(solar.values, columns=solar.columns, index=demand.index) # flat_solar_tariff = 0.04 # UK Pounds solar_price = pd.DataFrame([flat_solar_tariff] * len(price), columns=price.columns, index=price.index) # run_optimization(demand, price)
def transform(self, df): temp = pd.DatetimeIndex(df.index) df['weekday'] = temp.weekday df_weekdays = df[df['weekday'] <= 4].drop('weekday', axis=1) weekdays = extract_days(df_weekdays) df_weekends = df[df['weekday'] > 4].drop('weekday', axis=1) weekends = extract_days(df_weekends) print 'weekdays: {}, weekends: {}'.format(len(weekdays), len(weekends)) print part_of_week = self.environment_params['part_of_week'].values[0] # if part_of_week == 'weekdays': print print 'Selected weekdays only' return df_weekdays elif part_of_week == 'weekends': print print 'Selected weekends only' return df_weekends else: print print 'Selected all days of week' return df.drop('weekday', axis=1)
def transform(self, df): days = extract_days(df, self.datetime_col) # days_to_drop = [] datetimes = pd.to_datetime(df[self.datetime_col]) for i, day in enumerate(days): next_day = day + timedelta(days=1) df_day = df[(datetimes >= day) & (datetimes < next_day)] if len(df_day) < self.num_records_aday: days_to_drop.append(day) daytimes_indexes = [] for i, datetime in enumerate(datetimes): if datetime.date() in days_to_drop: daytimes_indexes.append(df.index[i]) return df.drop(daytimes_indexes)
def transform(self, df): '''Transform input time series into a set of time series for each hourly-slice in a day, where each time series for a given hour has days as its agument.''' days = extract_days(df) day = days[0] next_day = days[1] time_intervs_in_day = [] for i, datetime in enumerate(df.query('index >= @day and index < @next_day').index): if datetime.time() not in time_intervs_in_day: time_intervs_in_day.append(datetime.time()) # df['time'] = [d.time() for i, d in enumerate(df.index)]# Adding time only column time_intervs_data = {} # key=time interval, values = pd.DataFrame with daily time series for time_intv in time_intervs_in_day: time_intervs_data[time_intv] = df[df['time']==time_intv].drop('time', axis=1) # return time_intervs_data
def transform_inverse(self, df): price = pd.read_csv(self.path_to_price, parse_dates=True, index_col='Unnamed: 0') # index = [] values = [] days = extract_days(df) time_intervs_in_day = [d.time() for i, d in enumerate(price.index)] for day in days: i = 0 for time_intv in time_intervs_in_day: if time_intv <= df.index[i].time(): values.append(df.values[i][0]) index.append(datetime.combine(day, time_intv)) else: i+=1 values.append(df.values[i][0]) index.append(datetime.combine(day, time_intv)) df_out = pd.DataFrame(values, columns=[df.columns[0]], index=index) return df_out
def transform(self, df, test_data=False): ''' transf ''' price = pd.read_csv(self.path_to_price, parse_dates=True, index_col='Unnamed: 0') times_corr_by_price = {}# key=price corr., time intv., val=list of original time intervs. dummy_time_list = [] for i, pr in enumerate(price[price.columns[0]]): if i < len(price)-1: if pr == price[price.columns[0]][i+1]: dummy_time_list.append(price.index[i].time()) else: dummy_time_list.append(price.index[i].time()) times_corr_by_price[price.index[i].time()] = dummy_time_list dummy_time_list = [] else: dummy_time_list.append(price.index[i].time()) times_corr_by_price[price.index[i].time()] = dummy_time_list # '''price_corr_data: key=price corr., time intv., val=dict with keys=time intervs vals=lists of smar meter redings over past days.''' price_corr_data = {} df['time'] = [d.time() for i, d in enumerate(df.index)]# Adding time only column for time_intv, times in times_corr_by_price.iteritems(): price_corr_data[time_intv] =\ {time: df[df['time']==time].drop('time', axis=1)[df.columns[0]].values for time in times} days = extract_days(df) if test_data: data_means_grouped_by_price = pd.DataFrame() for time_intv, data in price_corr_data.iteritems(): data_means_grouped_by_price =\ data_means_grouped_by_price.append(pd.DataFrame(pd.DataFrame(data).mean(axis=1).values,\ columns=[df.columns[0]],\ index=[datetime.combine(day, time_intv) for day in days])) data_means_grouped_by_price.sort_index(inplace=True, kind='mergesort') else: data_means_grouped_by_price = {} for time_intv, data in price_corr_data.iteritems(): data_means_grouped_by_price[time_intv] =\ pd.DataFrame(pd.DataFrame(data).mean(axis=1).values,\ columns=[df.columns[0]],\ index=[datetime.combine(day, time_intv) for day in days]) # return data_means_grouped_by_price
def run_optimization(environment_params): household_id = environment_params['household_id'].values[0] model_name = environment_params['model_name'].values[0] part_of_week = environment_params['part_of_week'].values[0] train_days = str(environment_params['train_days'].values[0]) price_file_name = environment_params['price_file_name'].values[0] # path_to_pred = '../predictions/'+household_id+'_'+model_name+'_'+part_of_week+'_'+train_days+'.csv' path_to_test = '../predictions/'+household_id+'_test_'+part_of_week+'_'+train_days+'.csv' path_to_price = '../clean_data/'+price_file_name+'.csv' # print_process('Loading Predicted Demand, Test, and Pricing Data') demand = pd.read_csv(path_to_pred, parse_dates=True, index_col='Unnamed: 0') test = pd.read_csv(path_to_test, parse_dates=True, index_col='Unnamed: 0') test = pd.DataFrame(test.values, columns=[test.columns[0]], index=demand.index) price = pd.read_csv(path_to_price, parse_dates=True, index_col='Unnamed: 0') # ''' Extrapolation of one day pricing data to a number of days in the demand, in case there are no pricing data for the same number of days as in demand.''' demand_days = extract_days(demand) num_demand_days = len(demand_days) price_days = extract_days(price) num_price_days = len(price_days) if num_demand_days > num_price_days: price_last_day = price_days[num_price_days-1] price_dummy = price.query('index >= @price_last_day') for day_gap in xrange(num_demand_days-num_price_days): price = price.append(price_dummy) price = pd.DataFrame(price.values, columns=[price.columns[0]], index=demand.index) # battery_params = pd.read_csv('../params/battery_params.txt', delim_whitespace=True) battery_capacity_interval = battery_params['battery_capacity_interval'].values[0] battery_capacity_max =\ battery_capacity_interval*(1+battery_params['battery_capacity_multiple'].values[0]) efficiency = battery_params['efficiency'].values[0] charging_rate = battery_params['charging_rate'].values[0] # savings = [] battery_capacities =\ np.arange(battery_capacity_interval, battery_capacity_max, battery_capacity_interval) batteries = {} # for battery_capacity in battery_capacities: mdb = MinimizeDailyBill(battery_capacity, efficiency, charging_rate) battery = mdb.fit(demand, price) batteries[round(battery_capacity, 2)] = battery daily_bill_w_battery = mdb.daily_bill # ''' Computation of daily bill if there were no battery''' test_vec = np.array(test.values) price_vec = np.array(price.values) daily_bill_no_battery = np.dot(test_vec.T, price_vec)[0][0] saving = round(100.0*(daily_bill_no_battery-daily_bill_w_battery)/daily_bill_no_battery, 2) savings.append(saving) print '|==============================================================|' print 'Daily Bill with no Battery: {}'.format(daily_bill_no_battery) print print 'Daily Bill with Battery: {}'.format(daily_bill_w_battery) print print 'Daily Savings: {}% for the battery capacity: {}'.format(saving, round(battery_capacity, 2)) # day_to_pred = demand.index[0].date() plot_battery_savings(battery_capacities, savings, day_to_pred, model_name, part_of_week, household_id, train_days) print battery_capacity = float(raw_input('Enter battery capacity for wich to plot battery phases of operation: ')) plot_results(demand, price, batteries[battery_capacity], day_to_pred, model_name, part_of_week, household_id, train_days, battery_capacity)
model_name = environment_params['model_name'].values[0] print_process('Making predictions from ' + model_name + ' Model') pdtia = PredDataTimeIntervARMA(environment_params) df_pred = pdtia.predict() # # Loading test data and extracting only days specified to compare predictions with train_days = str(environment_params['train_days'].values[0]) part_of_week = environment_params['part_of_week'].values[0] num_days_pred = environment_params['num_days_pred'].values[0] # path_to_test_data = '../clean_data/' + household_id + '_test_' + part_of_week + '_' + train_days + '.csv' df_test = pd.read_csv(path_to_test_data, parse_dates=True, index_col='Unnamed: 0') # days = extract_days(df_test) first_day_to_pred = days[0] last_day_to_pred = days[num_days_pred] df_test_days = df_test.query( 'index >= @first_day_to_pred and index < @last_day_to_pred') # if model_name == 'PriceCorrARMA': price_file_name = environment_params['price_file_name'].values[ 0] mcp = ModelsCorrPrice(price_file_name) df_pred = mcp.transform_inverse(df_pred) # df_test_days = mcp.transform(df_test_days, test_data=True) df_test_days = mcp.transform_inverse(df_test_days) # df_pred = pd.DataFrame(df_pred.values,
def run_optimization(demand, price, solar=[], solar_price=[]): # ''' Extrapolation of one day pricing data to a number of days in the demand, in case there are no pricing data for the same number of days as in demand.''' demand_days = extract_days(demand) num_demand_days = len(demand_days) price_days = extract_days(price) num_price_days = len(price_days) if num_demand_days > num_price_days: price_last_day = price_days[num_price_days - 1] price_dummy = price.query('index >= @price_last_day') if len(solar) != 0 and len(solar_price) != 0: solar_price_dummy = solar_price.query('index >= @price_last_day') for day_gap in xrange(num_demand_days - num_price_days): price = price.append(price_dummy) if len(solar) != 0 and len(solar_price) != 0: solar_price = solar_price.append(solar_price_dummy) price = pd.DataFrame(price.values, columns=[price.columns[0]], index=demand.index) if len(solar) != 0 and len(solar_price) != 0: solar_price = pd.DataFrame(solar_price.values, columns=[solar_price.columns[0]], index=demand.index) # battery_params = pd.read_csv('../params/battery_params.txt', delim_whitespace=True) battery_capacity_interval = battery_params[ 'battery_capacity_interval'].values[0] battery_capacity_max =\ battery_capacity_interval*(1+battery_params['battery_capacity_multiple'].values[0]) efficiency = battery_params['efficiency'].values[0] charging_rate = battery_params['charging_rate'].values[0] # savings = [] battery_capacities =\ np.arange(battery_capacity_interval, battery_capacity_max, battery_capacity_interval) batteries = {} # for battery_capacity in battery_capacities: mb = MinimizeBill(battery_capacity, efficiency, charging_rate) if len(solar) != 0 and len(solar_price) != 0: battery = mb.fit(demand, price, solar, solar_price) else: battery = mb.fit(demand, price) batteries[round(battery_capacity, 2)] = battery daily_bill_w_battery = mb.bill # ''' Computation of daily bill if there were no battery''' demand_vec = np.array(demand.values) price_vec = np.array(price.values) daily_bill_no_battery = np.dot(demand_vec.T, price_vec)[0][0] saving = round( 100.0 * (daily_bill_no_battery - daily_bill_w_battery) / daily_bill_no_battery, 2) savings.append(saving) print '|==============================================================|' print 'Daily Bill with no Battery: {}'.format(daily_bill_no_battery) print print 'Daily Bill with Battery: {}'.format(daily_bill_w_battery) print print 'Daily Savings: {}% for the battery capacity: {}'.format( saving, round(battery_capacity, 2)) # day_to_pred = len(extract_days(demand)) plot_battery_savings(battery_capacities, savings, day_to_pred) print battery_capacity = float( raw_input( 'Enter battery capacity for wich to plot battery phases of operation: ' )) plot_results(demand, price, batteries[battery_capacity], day_to_pred, battery_capacity)