Beispiel #1
0
 def calculate_d_payback(self):
     cost_unit = cfg.getParam('currency_year') + " " + cfg.getParam('currency_name')
     initial_vintage = min(cfg.supply_years)
     demand_side_df = self.demand.d_annual_costs_payback
     demand_side_df.columns = ['value']
     demand_side_df = demand_side_df[demand_side_df.index.get_level_values('vintage')>=initial_vintage]
     demand_side_df = demand_side_df[demand_side_df.index.get_level_values('year')>=initial_vintage]
     sales_df = copy.deepcopy(self.demand.outputs.d_sales)
     util.replace_index_name(sales_df,'vintage','year')
     sales_df = sales_df[sales_df.index.get_level_values('vintage')>=initial_vintage]     
     sales_df = util.add_and_set_index(sales_df,'year',cfg.supply_years)
     sales_df.index = sales_df.index.reorder_levels(demand_side_df.index.names)
     sales_df = sales_df.reindex(demand_side_df.index).sort_index()
     self.demand.outputs.d_payback = util.DfOper.divi([demand_side_df, sales_df])
     self.demand.outputs.d_payback = self.demand.outputs.d_payback[np.isfinite(self.demand.outputs.d_payback.values)]        
     self.demand.outputs.d_payback = self.demand.outputs.d_payback.replace([np.inf,np.nan],0)
     for sector in self.demand.sectors.values():
       for subsector in sector.subsectors.values():
             if hasattr(subsector,'stock') and subsector.sub_type!='link':
                 indexer = util.level_specific_indexer(self.demand.outputs.d_payback,'subsector',subsector.id)
                 self.demand.outputs.d_payback.loc[indexer,'unit'] = subsector.stock.unit.upper()
     self.demand.outputs.d_payback = self.demand.outputs.d_payback.set_index('unit', append=True)
     self.demand.outputs.d_payback.columns = [cost_unit.upper()]
     self.demand.outputs.d_payback['lifetime_year'] = self.demand.outputs.d_payback.index.get_level_values('year')-self.demand.outputs.d_payback.index.get_level_values('vintage')+1    
     self.demand.outputs.d_payback = self.demand.outputs.d_payback.set_index('lifetime_year',append=True)
     self.demand.outputs.d_payback = util.remove_df_levels(self.demand.outputs.d_payback,'year')
     self.demand.outputs.d_payback = self.demand.outputs.d_payback.groupby(level = [x for x in self.demand.outputs.d_payback.index.names if x !='lifetime_year']).transform(lambda x: x.cumsum())
     self.demand.outputs.d_payback = self.demand.outputs.d_payback[self.demand.outputs.d_payback[cost_unit.upper()]!=0]
     self.demand.outputs.d_payback = self.demand.outputs.return_cleaned_output('d_payback')
Beispiel #2
0
 def calc_and_format_embodied_costs(self):
     #calculate and format embodied supply costs
     embodied_costs_list = [Output.clean_df(x) for x in self.demand.outputs.demand_embodied_energy_costs]
     cost_unit = cfg.getParam('currency_year') + " " + cfg.getParam('currency_name')
     for embodied_costs in embodied_costs_list: embodied_costs.columns = [cost_unit.upper()]
     embodied_costs_list = [util.add_to_df_index(x, names=['EXPORT/DOMESTIC', "SUPPLY/DEMAND"], keys=["DOMESTIC","SUPPLY"]) for x in embodied_costs_list]
     return embodied_costs_list
Beispiel #3
0
 def calculate_tco(self):
     cost_unit = cfg.getParam('currency_year') + " " + cfg.getParam('currency_name')
     initial_vintage = min(cfg.supply_years)
     supply_side_df = self.demand.outputs.demand_embodied_energy_costs_tco
     supply_side_df = supply_side_df[supply_side_df.index.get_level_values('vintage')>=initial_vintage]
     demand_side_df = self.demand.d_levelized_costs_tco
     demand_side_df.columns = ['value']
     demand_side_df = demand_side_df[demand_side_df.index.get_level_values('vintage')>=initial_vintage]
     service_demand_df = self.demand.d_service_demand_tco
     service_demand_df = service_demand_df[service_demand_df.index.get_level_values('vintage')>=initial_vintage]
     keys = ['SUPPLY-SIDE', 'DEMAND-SIDE']
     names = ['COST TYPE']
     self.outputs.c_tco = pd.concat([util.DfOper.divi([supply_side_df,util.remove_df_levels(service_demand_df,'unit')]),
                                     util.DfOper.divi([demand_side_df,util.remove_df_levels(service_demand_df,'unit')])],
                                     keys=keys,names=names) 
     self.outputs.c_tco = self.outputs.c_tco.replace([np.inf,np.nan],0)
     self.outputs.c_tco[self.outputs.c_tco<0]=0        
     for sector in self.demand.sectors.values():
       for subsector in sector.subsectors.values():
             if hasattr(subsector,'service_demand') and hasattr(subsector,'stock'):
                 indexer = util.level_specific_indexer(self.outputs.c_tco,'subsector',subsector.id)
                 self.outputs.c_tco.loc[indexer,'unit'] = subsector.service_demand.unit.upper()
     self.outputs.c_tco = self.outputs.c_tco.set_index('unit',append=True)
     self.outputs.c_tco.columns = [cost_unit.upper()]
     self.outputs.c_tco= self.outputs.c_tco[self.outputs.c_tco[cost_unit.upper()]!=0]
     self.outputs.c_tco = self.outputs.return_cleaned_output('c_tco')
Beispiel #4
0
 def calc_and_format_export_costs(self):
     #calculate and format export costs
     if self.supply.export_costs is None:
         return None
     export_costs = GeoMapper.geo_map(self.supply.export_costs.copy(), GeoMapper.supply_primary_geography, GeoMapper.combined_outputs_geography, 'total')
     export_costs = Output.clean_df(export_costs)
     util.replace_index_name(export_costs, 'FINAL_ENERGY', 'SUPPLY_NODE_EXPORT')
     export_costs = util.add_to_df_index(export_costs, names=['EXPORT/DOMESTIC', "SUPPLY/DEMAND"], keys=["EXPORT", "SUPPLY"])
     cost_unit = cfg.getParam('currency_year') + " " + cfg.getParam('currency_name')
     export_costs.columns = [cost_unit.upper()]
     return export_costs
Beispiel #5
0
 def calculate_combined_emissions_results(self):
     export_emissions = self.calc_and_format_export_emissions()
     embodied_emissions_list = self.calc_and_format_embodied_supply_emissions()
     direct_emissions_list = self.calc_and_format_direct_demand_emissions()
     export_emissions = util.add_and_set_index(export_emissions,['EMISSIONS_TYPE'],['EXPORTED'])
     embodied_emissions_list = [util.add_and_set_index(x, ['EMISSIONS_TYPE'], ['SUPPLY_SIDE']) for x in embodied_emissions_list]
     direct_emissions_list = [util.add_and_set_index(x,['EMISSIONS_TYPE'],['DEMAND_SIDE']) for x in direct_emissions_list]
     if export_emissions is not None:
         for name in [x for x in embodied_emissions_list[0].index.names if x not in export_emissions.index.names]:
             export_emissions[name] = "N/A"
             export_emissions.set_index(name,append=True,inplace=True)
         export_emissions = export_emissions.groupby(level=embodied_emissions_list[0].index.names).sum()
     if direct_emissions_list is not None:
         for df in direct_emissions_list:
             for name in [x for x in embodied_emissions_list[0].index.names if x not in df.index.names]:
                 df[name] = "N/A"
                 df.set_index(name,append=True,inplace=True)
     self.outputs.c_emissions = [export_emissions] + embodied_emissions_list + direct_emissions_list
     self.outputs.c_emissions = [util.replace_index_name(x, GeoMapper.combined_outputs_geography.upper() +'-EMITTED', GeoMapper.combined_outputs_geography.upper() +'_SUPPLY',inplace=True) for x in self.outputs.c_emissions]
     self.outputs.c_emissions = [util.replace_index_name(x, GeoMapper.combined_outputs_geography.upper() +'-CONSUMED', GeoMapper.combined_outputs_geography.upper(),inplace=True) for x in self.outputs.c_emissions]
     self.outputs.c_emissions = [x[x['VALUE']!=0] for x in  self.outputs.c_emissions]
     emissions_unit = cfg.getParam('mass_unit')
     for x in self.outputs.c_emissions:
         x.columns = [emissions_unit.upper()]
     for x in self.outputs.c_emissions: x.index = x.index.reorder_levels([l for l in embodied_emissions_list[0].index.names if l in x.index.names])
Beispiel #6
0
    def __init__(self, database_path=None):
        db = CsvDatabase.get_database(database_path)
        db.shapes.load_all()
        self.cfg_weather_years = [
            int(y) for y in cfg.getParam('weather_years').split(',')
        ]
        self.active_dates_index = self.get_active_dates(self.cfg_weather_years)
        self.active_dates_index_unique = self.active_dates_index.unique()
        self.time_slice_elements = create_time_slice_elements(
            self.active_dates_index)
        self.num_active_years = num_active_years(self.active_dates_index)
        self.cfg_hash_tuple = self.get_hash_tuple()
        self.cfg_hash = hash(self.cfg_hash_tuple)

        if len(db.shapes.slices.keys()) == 0:
            raise ValueError(
                "No shapes data found, check path to the database. The folder ShapeData must be located in the database folder specified"
            )

        shape_meta = db.get_table("Shapes").data
        self.data = {}
        for i, meta in shape_meta.iterrows():
            if meta['name'] not in db.shapes.slices.keys():
                logging.error(
                    'Skipping shape {}: cannot find shape data'.format(
                        meta['name']))
                continue
            if meta['is_active']:
                self.data[meta['name']] = Shape(
                    meta, db.shapes.get_slice(meta['name']),
                    self.active_dates_index, self.active_dates_index_unique,
                    self.time_slice_elements, self.num_active_years)

        self.process_active_shapes()
Beispiel #7
0
    def __init__(self, name, scenario=None):
        super(DispatchFeederAllocation, self).__init__(name, scenario=scenario)
        self.init_from_db(name, scenario)

        if self.raw_values is not None:
            assert (self.raw_values.groupby(level=['year', self.geography]).sum() == 1).all().all()
            self.remap(map_from='raw_values', map_to='values', converted_geography=getParam('demand_primary_geography'))
            self.values.sort_index(inplace=True)
Beispiel #8
0
 def get_hash_tuple(cls):
     cfg_weather_years = [
         int(y) for y in cfg.getParam('weather_years').split(',')
     ]
     geography_check = (GeoMapper.demand_primary_geography,
                        GeoMapper.supply_primary_geography,
                        tuple(sorted(GeoMapper.primary_subset)),
                        tuple(GeoMapper.breakout_geography))
     cfg_hash_tuple = geography_check + tuple(cfg_weather_years)
     return cfg_hash_tuple
Beispiel #9
0
 def calculate_combined_cost_results(self):
     cost_unit = cfg.getParam('currency_year') + " " + cfg.getParam('currency_name')
     export_costs = self.calc_and_format_export_costs()
     embodied_costs_list = self.calc_and_format_embodied_costs()
     direct_costs = self.calc_and_format_direct_demand_costs()
     export_costs = util.add_and_set_index(export_costs,['COST_TYPE'],['EXPORTED'])
     embodied_costs_list = [util.add_and_set_index(x,['COST_TYPE'],['SUPPLY-SIDE']) for x in embodied_costs_list]
     direct_costs = util.add_and_set_index(direct_costs,['COST_TYPE'],['DEMAND-SIDE'])
     if export_costs is not None:
         for name in [x for x in embodied_costs_list[0].index.names if x not in export_costs.index.names]:
             export_costs[name] = "N/A"
             export_costs.set_index(name,append=True,inplace=True)
         export_costs = export_costs.groupby(level=embodied_costs_list[0].index.names).sum()
     if direct_costs is not None:
         for name in [x for x in embodied_costs_list[0].index.names if x not in direct_costs.index.names]:
             direct_costs[name] = "N/A"
             direct_costs.set_index(name, append=True, inplace=True)
         direct_costs = direct_costs.groupby(level=embodied_costs_list[0].index.names).sum()
     self.outputs.c_costs = embodied_costs_list + [direct_costs] + [export_costs]
     self.outputs.c_costs= [x[x.values!=0] for x in self.outputs.c_costs]
     for x in self.outputs.c_costs: x.index = x.index.reorder_levels(embodied_costs_list[0].index.names)
Beispiel #10
0
    def __init__(self, database_path):
        # Initiate pint for unit conversions
        self.ureg = pint.UnitRegistry()
        self.cfg_energy_unit = cfg.getParam('calculation_energy_unit')
        self.cfg_currency = cfg.getParam('currency_name')
        self.cfg_currency_year = cfg.getParamAsInt('currency_year')

        db = get_database(database_path)
        self.currency_table = db.get_table("CurrenciesConversion").data
        self.currency_table = self.currency_table.set_index(
            ['currency', 'year']).sort_index()
        self.inflation_table = db.get_table("InflationConversion").data
        self.inflation_table = self.inflation_table.set_index(
            ['currency', 'year']).sort_index()

        for unit_def in UnitConverter._unit_defs:
            unit_name = unit_def.split(' = ')[0]
            if hasattr(self.ureg, unit_name):
                logging.debug(
                    'pint already has unit {}, unit is not being redefined'.
                    format(unit_name))
                continue
            self.ureg.define(unit_def)
 def convert(self):
     model_energy_unit = cfg.calculation_energy_unit
     model_time_step = cfg.getParam('time_step')
     if self.time_unit is not None:
         # if sales has a time_unit, then the unit is energy and must be converted to capacity
         self.values = UnitConverter.unit_convert(self.values, unit_from_num=self.capacity_or_energy_unit,
                                         unit_from_den=self.time_unit, unit_to_num=model_energy_unit,
                                         unit_to_den=model_time_step)
     else:
         # if sales is a capacity unit, the model must convert the unit type to an energy unit for conversion ()
         unit_from_num = self.capacity_or_energy_unit + "_" + model_time_step
         self.values = UnitConverter.unit_convert(self.values,
                                         unit_from_num=unit_from_num,
                                         unit_from_den=model_time_step,
                                         unit_to_num=model_energy_unit,
                                         unit_to_den=model_time_step)
Beispiel #12
0
    def standardize_time_across_timezones(self, df):
        tz = pytz.timezone(cfg.getParam('dispatch_outputs_timezone'))
        offset = (tz.utcoffset(DT.datetime(2015, 1, 1)) +
                  tz.dst(DT.datetime(2015, 1, 1))).total_seconds() / 60.
        new_index = pd.DatetimeIndex(self.active_dates_index_unique,
                                     tz=pytz.FixedOffset(offset))
        # if we have hydro year, when this does a reindex, it can introduce NaNs, so we want to remove them after
        assert not df.isnull().any().any()
        standardize_df = util.reindex_df_level_with_new_elements(
            df.copy(), 'weather_datetime', new_index)

        levels = [n for n in df.index.names if n != 'weather_datetime']
        standardize_df = standardize_df.groupby(level=levels).fillna(
            method='bfill').fillna(method='ffill')
        standardize_df = standardize_df[~standardize_df.isnull().values]

        return standardize_df
 def convert(self):
     """
     convert values to model currency and capacity (energy_unit/time_step)
     """
     if self.values is not None:
         model_energy_unit = cfg.calculation_energy_unit
         model_time_step = cfg.getParam('time_step')
         if self.time_unit is not None:
             self.values = UnitConverter.unit_convert(self.values/self.input_timestep, unit_from_num=self.capacity_or_energy_unit,
                                             unit_from_den=self.time_unit, unit_to_num=model_energy_unit,
                                             unit_to_den=model_time_step)
         else:
             unit_from_num = self.capacity_or_energy_unit + "_" + model_time_step
             self.values = UnitConverter.unit_convert(self.values/self.input_timestep,
                                                      unit_from_num=unit_from_num,
                                                      unit_from_den=model_time_step,
                                                      unit_to_num=model_energy_unit,
                                                      unit_to_den=model_time_step)
 def convert(self):
     """
     convert raw_values to model currency and capacity (energy_unit/time_step)
     """
     model_energy_unit = cfg.calculation_energy_unit
     model_time_step = cfg.getParam('time_step')
     if hasattr(self, 'time_unit') and self.time_unit is not None:
         # if a cost has a time_unit, then the unit is energy and must be converted to capacity
         self.values = UnitConverter.unit_convert(self.raw_values, unit_from_den=self.capacity_or_energy_unit, unit_from_num=self.time_unit, unit_to_den=model_energy_unit, unit_to_num=model_time_step)
     else:
         # if a cost is a capacity unit, the model must convert the unit type to an energy unit for conversion ()
         unit_from_den = self.capacity_or_energy_unit + "_" + model_time_step
         self.values = UnitConverter.unit_convert(self.raw_values, unit_from_den=unit_from_den, unit_from_num=model_time_step, unit_to_den=model_energy_unit, unit_to_num=model_time_step)
     if self.definition == 'absolute':
         self.values = UnitConverter.currency_convert(self.values, self.currency, self.currency_year)
         self.absolute = True
     else:
         self.absolute = False
    def min_year(self):
        """calculates the minimum or start year of data in the technology specification.
        Used to determine start year of subsector for analysis."""

        attributes = vars(self)
        self.min_year = cfg.getParam('current_year')
        for att in attributes:
            obj = getattr(self, att)
            if inspect.isclass(type(obj)) and hasattr(
                    obj, '__dict__') and hasattr(obj, 'raw_values'):
                try:
                    att_min_year = min(
                        obj.raw_values.index.levels[util.position_in_index(
                            obj.raw_values, 'vintage')])
                except:
                    att_min_year = self.min_year
                if att_min_year < self.min_year:
                    self.min_year = att_min_year
                else:
                    pass
Beispiel #16
0
    def localize_shapes(self, df):
        """ Step through time zone and put each profile maped to time zone in that time zone
        """
        local_df = []
        for tz, group in df.groupby(level='time zone'):
            # get the time zone name and figure out the offset from UTC
            tz = pytz.timezone(self.time_zone or format_timezone_str(tz))
            _dt = DT.datetime(2015, 1, 1)
            offset = (tz.utcoffset(_dt) + tz.dst(_dt)).total_seconds() / 60.
            # localize and then convert to dispatch_outputs_timezone
            df2 = group.tz_localize(pytz.FixedOffset(offset),
                                    level='weather_datetime')
            local_df.append(df2)

        tz = pytz.timezone(cfg.getParam('dispatch_outputs_timezone'))
        offset = (tz.utcoffset(DT.datetime(2015, 1, 1)) +
                  tz.dst(DT.datetime(2015, 1, 1))).total_seconds() / 60.
        local_df = pd.concat(local_df).tz_convert(pytz.FixedOffset(offset),
                                                  level='weather_datetime')
        return local_df.sort_index()
 def convert(self):
     """
     convert raw_values to model currency and capacity (energy_unit/time_step)
     """
     self.values = UnitConverter.currency_convert(self.values,
                                                  self.currency,
                                                  self.currency_year)
     model_energy_unit = cfg.calculation_energy_unit
     model_time_step = cfg.getParam('time_step')
     if self.time_unit is not None:
         # if a cost has a time_unit, then the unit is energy and must be converted to capacity
         self.values = UnitConverter.unit_convert(
             self.values,
             unit_from_den=self.self.capacity_or_energy_unit,
             unit_from_num=self.time_unit,
             unit_to_den=model_energy_unit,
             unit_to_num=model_time_step)
     else:
         self.values = UnitConverter.unit_convert(
             self.values,
             unit_from_den=self.capacity_or_energy_unit + "_" +
             model_time_step,
             unit_to_den=model_energy_unit)
Beispiel #18
0
def main():
    #Get param
    param = getParam()
    ID = param['ID']
    label = param['label']

    #Load data
    train = pd.read_csv('data/' + str(param['train_file']), encoding='u8')
    test = pd.read_csv('data/' + str(param['test_file']), encoding='u8')

    #Drop features
    drop_list = param['drop_list']
    df_train = train.drop(drop_list, axis=1)
    df_test = test.drop(drop_list, axis=1)

    #Assign feature and label
    selected_cols = df_train.columns[(df_train.columns != ID)
                                     & (df_train.columns != label)]
    X_train, X_test = df_train.loc[:,
                                   selected_cols], df_test.loc[:,
                                                               selected_cols]
    Y_train, Y_test = df_train[label], df_test[label]

    #Fillna
    X_train = naTransformer(X_train)
    X_test = naTransformer(X_test)

    #Deal with Date feature
    date_cols = param['date_cols']
    X_train = dateTransformer(X_train, date_cols)
    X_test = dateTransformer(X_test, date_cols)

    #Deal with Category Feature
    dict_train = X_train.to_dict(orient='records')
    dict_test = X_test.to_dict(orient='records')
    vectorizer = DictVectorizer()
    vec_train = vectorizer.fit_transform(dict_train)
    vec_test = vectorizer.transform(dict_test)

    #Store the feature names
    features = vectorizer.feature_names_
    feature_map('xgb' + str(sys.argv[2]) + '.fmap', features)

    #Cross validation
    kf = KFold(n_splits=5, shuffle=True)
    output_test = pd.DataFrame()
    for train_index, val_index in kf.split(Y_train):
        #print("TRAIN:", train_index, "VAL:", val_index)
        feature_train, feature_val = vec_train[train_index, :], vec_train[
            val_index, :]
        label_train, label_val = Y_train[train_index], Y_train[val_index]

        ###XGBOOST Model
        dtrain = xgb.DMatrix(feature_train, label=label_train)
        dval = xgb.DMatrix(feature_val, label=label_val)
        #dtest = xgb.DMatrix(vec_test, label=Y_test)
        param = {
            'max_depth': 4,
            'eta': 0.1,
            'silent': 1,
            'objective': 'binary:logistic',
            'nthread': 29,
            'eval_metric': 'auc'
        }
        evallist = [(dval, 'val'), (dtrain, 'train')]
        num_round = int(sys.argv[1])
        bst = xgb.train(param.items(), dtrain, num_round, evallist)
        break
        ###################################################################

    dtrain = xgb.DMatrix(vec_train, label=Y_train)
    dtest = xgb.DMatrix(vec_test, label=Y_test)
    bst = xgb.train(param.items(), dtrain, num_round)
    #dump model
    bst.dump_model('xgb' + str(sys.argv[2]) + '.dump',
                   fmap='xgb' + str(sys.argv[2]) + '.fmap',
                   with_stats=True)
    bst.save_model('test' + str(sys.argv[2]) + '.model')

    feature_score = bst.get_score(fmap='xgb.fmap', importance_type='gain')
    #for key, value in feature_score.iteritems():
    #    print key.encode('u8'), value

    #print datetime.now()
    dtest = xgb.DMatrix(vec_test, label=Y_test)
    output_test['XGB'] = bst.predict(dtest)
    #print datetime.now()
    output_test['ID'] = test[ID].astype(str)
    output_test['Label'] = test[label]
    #output_test['Money'] = test[u'本金余额']
    output_test[['ID', 'XGB', 'Label']].to_csv('data/result.csv',
                                               index=False,
                                               encoding='u8')