def show_households_details(df, household_id, datetime_col):
    ets = ExtractTimeSeries(datetime_col, 'KWH/hh (per half hour) ')
    df = ets.transform(df)
    days = extract_days(df)
    #
    temp = pd.DatetimeIndex(df.index)
    df['weekday'] = temp.weekday
    df_weekdays = df[df['weekday'] <= 4]
    weekdays = extract_days(df_weekdays)
    df_weekends = df[df['weekday'] > 4]
    weekends = extract_days(df_weekends)
    print
    print 'The total number of days for the household {} is: {}, where weekdays: {} and weekends: {}'\
    .format(household_id,len(days),len(weekdays),len(weekends))
    print
 def train_test_split(self, df):
     household_id = self.environment_params['household_id'].values[0]
     part_of_week = self.environment_params['part_of_week'].values[0]
     train_days = self.environment_params['train_days'].values[0]
     if train_days < 10:
         print 'There must be at least 10 days in training set!'
         return
     #
     days = extract_days(df)
     print '{}th day: {}'.format(train_days, days[train_days-1])
     test_set_first_date = days[train_days]
     #
     df_train = df.query('index < @test_set_first_date')
     df_test = df.query('index >= @test_set_first_date')
     print
     print 'Training data set'
     print df_train.head()
     print df_train.tail()
     print
     print 'Test data set'
     print df_test.head()
     print df_test.tail()
     print_process('Saving Train and Test Data')
     path_to_train_data = '../clean_data/'+household_id+'_train_'+part_of_week+'_'+str(train_days)+'.csv'
     path_to_test_data = '../clean_data/'+household_id+'_test_'+part_of_week+'_'+str(train_days)+'.csv'
     df_train.to_csv(path_to_train_data)
     df_test.to_csv(path_to_test_data)
     print 'Train data saved into: {}'.format(path_to_train_data)
     print 'Test data saved into: {}'.format(path_to_test_data)
     return df_train, df_test
def main():
    #
    # Loading data into pd.DataFrames
    path_to_demand = '../data/MAC000002_train_weekdays_3.csv'
    demand = pd.read_csv(path_to_demand,
                         parse_dates=True,
                         index_col='Unnamed: 0')
    demand = demand.resample('1H').sum()
    days = extract_days(demand)
    #
    path_to_price = '../data/price_data_London.csv'
    price = pd.read_csv(path_to_price,
                        parse_dates=True,
                        index_col='Unnamed: 0')
    price = price.resample('1H').pad()
    #
    solar = pd.read_csv('../data/Generic Run Results.csv')
    solar.drop('Time stamp', axis=1, inplace=True)
    solar = solar.head(24 * len(days))
    solar = pd.DataFrame(solar.values,
                         columns=solar.columns,
                         index=demand.index)
    #
    flat_solar_tariff = 0.04  # UK Pounds
    solar_price = pd.DataFrame([flat_solar_tariff] * len(price),
                               columns=price.columns,
                               index=price.index)
    #
    run_optimization(demand, price)
 def transform(self, df):
     temp = pd.DatetimeIndex(df.index)
     df['weekday'] = temp.weekday
     df_weekdays = df[df['weekday'] <= 4].drop('weekday', axis=1)
     weekdays = extract_days(df_weekdays)
     df_weekends = df[df['weekday'] > 4].drop('weekday', axis=1)
     weekends = extract_days(df_weekends)
     print 'weekdays: {}, weekends: {}'.format(len(weekdays), len(weekends))
     print
     part_of_week = self.environment_params['part_of_week'].values[0]
     #
     if part_of_week == 'weekdays':
         print
         print 'Selected weekdays only'
         return df_weekdays
     elif part_of_week == 'weekends':
         print
         print 'Selected weekends only'
         return df_weekends
     else:
         print
         print 'Selected all days of week'
         return df.drop('weekday', axis=1)
Exemple #5
0
    def transform(self, df):
        days = extract_days(df, self.datetime_col)
        #
        days_to_drop = []
        datetimes = pd.to_datetime(df[self.datetime_col])
        for i, day in enumerate(days):
            next_day = day + timedelta(days=1)
            df_day = df[(datetimes >= day) & (datetimes < next_day)]
            if len(df_day) < self.num_records_aday:
                days_to_drop.append(day)

        daytimes_indexes = []
        for i, datetime in enumerate(datetimes):
            if datetime.date() in days_to_drop:
                daytimes_indexes.append(df.index[i])
        return df.drop(daytimes_indexes)
 def transform(self, df):
     '''Transform input
     time series into a set of time series for
     each hourly-slice in a day, where each time
     series for a given hour has days as its agument.'''
     days = extract_days(df)
     day = days[0]
     next_day = days[1]
     time_intervs_in_day = []
     for i, datetime in enumerate(df.query('index >= @day and index < @next_day').index):
         if datetime.time() not in time_intervs_in_day:
             time_intervs_in_day.append(datetime.time())
     #
     df['time'] = [d.time() for i, d in enumerate(df.index)]# Adding time only column
     time_intervs_data = {} # key=time interval, values = pd.DataFrame with daily time series
     for time_intv in time_intervs_in_day:
         time_intervs_data[time_intv] = df[df['time']==time_intv].drop('time', axis=1)
     #
     return time_intervs_data
 def transform_inverse(self, df):
     price = pd.read_csv(self.path_to_price, parse_dates=True, index_col='Unnamed: 0')
     #
     index = []
     values = []
     days = extract_days(df)
     time_intervs_in_day = [d.time() for i, d in enumerate(price.index)]
     for day in days:
         i = 0
         for time_intv in time_intervs_in_day:
             if time_intv <= df.index[i].time():
                 values.append(df.values[i][0])
                 index.append(datetime.combine(day, time_intv))
             else:
                 i+=1
                 values.append(df.values[i][0])
                 index.append(datetime.combine(day, time_intv))
     df_out = pd.DataFrame(values, columns=[df.columns[0]], index=index)
     return df_out
    def transform(self, df, test_data=False):
        ''' transf '''
        price = pd.read_csv(self.path_to_price, parse_dates=True, index_col='Unnamed: 0')
        times_corr_by_price = {}# key=price corr., time intv., val=list of original time intervs.
        dummy_time_list = []
        for i, pr in enumerate(price[price.columns[0]]):
            if i < len(price)-1:
                if pr == price[price.columns[0]][i+1]:
                    dummy_time_list.append(price.index[i].time())
                else:
                    dummy_time_list.append(price.index[i].time())
                    times_corr_by_price[price.index[i].time()] = dummy_time_list
                    dummy_time_list = []
            else:
                dummy_time_list.append(price.index[i].time())
                times_corr_by_price[price.index[i].time()] = dummy_time_list
        #
        '''price_corr_data: key=price corr., time intv.,
        val=dict with keys=time intervs vals=lists of smar meter redings over past days.'''
        price_corr_data = {}
        df['time'] = [d.time() for i, d in enumerate(df.index)]# Adding time only column
        for time_intv, times in times_corr_by_price.iteritems():
            price_corr_data[time_intv] =\
            {time: df[df['time']==time].drop('time', axis=1)[df.columns[0]].values for time in times}

        days = extract_days(df)
        if test_data:
            data_means_grouped_by_price = pd.DataFrame()
            for time_intv, data in price_corr_data.iteritems():
                data_means_grouped_by_price =\
                data_means_grouped_by_price.append(pd.DataFrame(pd.DataFrame(data).mean(axis=1).values,\
                columns=[df.columns[0]],\
                index=[datetime.combine(day, time_intv) for day in days]))
            data_means_grouped_by_price.sort_index(inplace=True, kind='mergesort')
        else:
            data_means_grouped_by_price = {}
            for time_intv, data in price_corr_data.iteritems():
                data_means_grouped_by_price[time_intv] =\
                pd.DataFrame(pd.DataFrame(data).mean(axis=1).values,\
                columns=[df.columns[0]],\
                index=[datetime.combine(day, time_intv) for day in days])
        #
        return data_means_grouped_by_price
Exemple #9
0
def run_optimization(environment_params):
    household_id = environment_params['household_id'].values[0]
    model_name = environment_params['model_name'].values[0]
    part_of_week = environment_params['part_of_week'].values[0]
    train_days = str(environment_params['train_days'].values[0])
    price_file_name = environment_params['price_file_name'].values[0]
    #
    path_to_pred = '../predictions/'+household_id+'_'+model_name+'_'+part_of_week+'_'+train_days+'.csv'
    path_to_test = '../predictions/'+household_id+'_test_'+part_of_week+'_'+train_days+'.csv'
    path_to_price = '../clean_data/'+price_file_name+'.csv'
    #
    print_process('Loading Predicted Demand, Test, and Pricing Data')
    demand = pd.read_csv(path_to_pred, parse_dates=True, index_col='Unnamed: 0')
    test = pd.read_csv(path_to_test, parse_dates=True, index_col='Unnamed: 0')
    test = pd.DataFrame(test.values, columns=[test.columns[0]], index=demand.index)
    price = pd.read_csv(path_to_price, parse_dates=True, index_col='Unnamed: 0')
    #
    ''' Extrapolation of one day pricing data to a number of days in the demand,
    in case there are no pricing data for the same number of days as in demand.'''
    demand_days = extract_days(demand)
    num_demand_days = len(demand_days)
    price_days = extract_days(price)
    num_price_days = len(price_days)
    if num_demand_days > num_price_days:
        price_last_day = price_days[num_price_days-1]
        price_dummy = price.query('index >= @price_last_day')
        for day_gap in xrange(num_demand_days-num_price_days):
            price = price.append(price_dummy)
    price = pd.DataFrame(price.values, columns=[price.columns[0]], index=demand.index)
    #
    battery_params = pd.read_csv('../params/battery_params.txt', delim_whitespace=True)
    battery_capacity_interval = battery_params['battery_capacity_interval'].values[0]
    battery_capacity_max =\
    battery_capacity_interval*(1+battery_params['battery_capacity_multiple'].values[0])
    efficiency = battery_params['efficiency'].values[0]
    charging_rate = battery_params['charging_rate'].values[0]
    #
    savings = []
    battery_capacities =\
    np.arange(battery_capacity_interval, battery_capacity_max, battery_capacity_interval)
    batteries = {}
    #
    for battery_capacity in battery_capacities:
        mdb = MinimizeDailyBill(battery_capacity, efficiency, charging_rate)
        battery = mdb.fit(demand, price)
        batteries[round(battery_capacity, 2)] = battery
        daily_bill_w_battery = mdb.daily_bill
        #
        ''' Computation of daily bill if there were no battery'''
        test_vec = np.array(test.values)
        price_vec = np.array(price.values)
        daily_bill_no_battery = np.dot(test_vec.T, price_vec)[0][0]
        saving = round(100.0*(daily_bill_no_battery-daily_bill_w_battery)/daily_bill_no_battery, 2)
        savings.append(saving)
        print '|==============================================================|'
        print 'Daily Bill with no Battery: {}'.format(daily_bill_no_battery)
        print
        print 'Daily Bill with Battery: {}'.format(daily_bill_w_battery)
        print
        print 'Daily Savings: {}% for the battery capacity: {}'.format(saving, round(battery_capacity, 2))
    #
    day_to_pred = demand.index[0].date()
    plot_battery_savings(battery_capacities, savings, day_to_pred, model_name, part_of_week, household_id, train_days)
    print
    battery_capacity = float(raw_input('Enter battery capacity for wich to plot battery phases of operation: '))
    plot_results(demand, price, batteries[battery_capacity], day_to_pred, model_name, part_of_week, household_id, train_days, battery_capacity)
Exemple #10
0
 model_name = environment_params['model_name'].values[0]
 print_process('Making predictions from ' + model_name + ' Model')
 pdtia = PredDataTimeIntervARMA(environment_params)
 df_pred = pdtia.predict()
 #
 # Loading test data and extracting only days specified to compare predictions with
 train_days = str(environment_params['train_days'].values[0])
 part_of_week = environment_params['part_of_week'].values[0]
 num_days_pred = environment_params['num_days_pred'].values[0]
 #
 path_to_test_data = '../clean_data/' + household_id + '_test_' + part_of_week + '_' + train_days + '.csv'
 df_test = pd.read_csv(path_to_test_data,
                       parse_dates=True,
                       index_col='Unnamed: 0')
 #
 days = extract_days(df_test)
 first_day_to_pred = days[0]
 last_day_to_pred = days[num_days_pred]
 df_test_days = df_test.query(
     'index >= @first_day_to_pred and index < @last_day_to_pred')
 #
 if model_name == 'PriceCorrARMA':
     price_file_name = environment_params['price_file_name'].values[
         0]
     mcp = ModelsCorrPrice(price_file_name)
     df_pred = mcp.transform_inverse(df_pred)
     #
     df_test_days = mcp.transform(df_test_days, test_data=True)
     df_test_days = mcp.transform_inverse(df_test_days)
 #
 df_pred = pd.DataFrame(df_pred.values,
def run_optimization(demand, price, solar=[], solar_price=[]):
    #
    ''' Extrapolation of one day pricing data to a number of days in the demand,
    in case there are no pricing data for the same number of days as in demand.'''
    demand_days = extract_days(demand)
    num_demand_days = len(demand_days)
    price_days = extract_days(price)
    num_price_days = len(price_days)
    if num_demand_days > num_price_days:
        price_last_day = price_days[num_price_days - 1]
        price_dummy = price.query('index >= @price_last_day')
        if len(solar) != 0 and len(solar_price) != 0:
            solar_price_dummy = solar_price.query('index >= @price_last_day')
        for day_gap in xrange(num_demand_days - num_price_days):
            price = price.append(price_dummy)
            if len(solar) != 0 and len(solar_price) != 0:
                solar_price = solar_price.append(solar_price_dummy)
    price = pd.DataFrame(price.values,
                         columns=[price.columns[0]],
                         index=demand.index)
    if len(solar) != 0 and len(solar_price) != 0:
        solar_price = pd.DataFrame(solar_price.values,
                                   columns=[solar_price.columns[0]],
                                   index=demand.index)
    #
    battery_params = pd.read_csv('../params/battery_params.txt',
                                 delim_whitespace=True)
    battery_capacity_interval = battery_params[
        'battery_capacity_interval'].values[0]
    battery_capacity_max =\
    battery_capacity_interval*(1+battery_params['battery_capacity_multiple'].values[0])
    efficiency = battery_params['efficiency'].values[0]
    charging_rate = battery_params['charging_rate'].values[0]
    #
    savings = []
    battery_capacities =\
    np.arange(battery_capacity_interval, battery_capacity_max, battery_capacity_interval)
    batteries = {}
    #
    for battery_capacity in battery_capacities:
        mb = MinimizeBill(battery_capacity, efficiency, charging_rate)
        if len(solar) != 0 and len(solar_price) != 0:
            battery = mb.fit(demand, price, solar, solar_price)
        else:
            battery = mb.fit(demand, price)
        batteries[round(battery_capacity, 2)] = battery
        daily_bill_w_battery = mb.bill
        #
        ''' Computation of daily bill if there were no battery'''
        demand_vec = np.array(demand.values)
        price_vec = np.array(price.values)
        daily_bill_no_battery = np.dot(demand_vec.T, price_vec)[0][0]
        saving = round(
            100.0 * (daily_bill_no_battery - daily_bill_w_battery) /
            daily_bill_no_battery, 2)
        savings.append(saving)
        print '|==============================================================|'
        print 'Daily Bill with no Battery: {}'.format(daily_bill_no_battery)
        print
        print 'Daily Bill with Battery: {}'.format(daily_bill_w_battery)
        print
        print 'Daily Savings: {}% for the battery capacity: {}'.format(
            saving, round(battery_capacity, 2))
    #
    day_to_pred = len(extract_days(demand))
    plot_battery_savings(battery_capacities, savings, day_to_pred)
    print
    battery_capacity = float(
        raw_input(
            'Enter battery capacity for wich to plot battery phases of operation: '
        ))
    plot_results(demand, price, batteries[battery_capacity], day_to_pred,
                 battery_capacity)