def load_timeseries_from_dataframe(timeseries_dataframes, tskey): # If `df=` is called, timeseries_dataframes must be entered if timeseries_dataframes is None: raise exceptions.ModelError( "Error in loading timeseries. Model config specifies df={} but " "no timeseries passed as arguments in calliope.Model(...). " "Note that, if running from a command line, it is not possible " "to read dataframes via `df=...` and you should specify " "`file=...` with a CSV file.".format(tskey) ) try: df = timeseries_dataframes[tskey] except KeyError: raise exceptions.ModelError( "Error in loading data from dataframe. " "Model attempted to load dataframe with key `{}`, " "but time series passed as arguments are {}".format( tskey, set(timeseries_dataframes.keys()) ) ) if not isinstance(df, pd.DataFrame): raise exceptions.ModelError( "Error in loading data. Object passed in time series " "dictionary under key {} is a {}, not a DataFrame.".format(tskey, type(df)) ) return df
def update_pyomo_param(backend_model, param, update_dict): """ A Pyomo Param value can be updated without the user directly accessing the backend model. Parameters ---------- param : str Name of the parameter to update update_dict : dict keys are parameter indeces (either strings or tuples of strings, depending on whether there is one or more than one dimension). Values are the new values being assigned to the parameter at the given indeces. Returns ------- Value(s) will be updated in-place, requiring the user to run the model again to see the effect on results. """ if not hasattr(backend_model, param): raise exceptions.ModelError( "Parameter `{}` not in the Pyomo Backend. Check that the string " "matches the corresponding constraint/cost in the model.inputs " "xarray Dataset".format(param)) elif not isinstance(getattr(backend_model, param), po.base.param.IndexedParam): raise exceptions.ModelError( "`{}` not a Parameter in the Pyomo Backend. Sets and decision variables " "cannot be updated by the user".format(param)) elif not isinstance(update_dict, dict): raise TypeError("`update_dict` must be a dictionary") else: getattr(backend_model, param).store_values(update_dict)
def activate_pyomo_constraint(backend_model, constraint, active=True): """ Takes a constraint or objective name, finds it in the backend model and sets its status to either active or deactive. Parameters ---------- constraint : str Name of the constraint/objective to activate/deactivate Built-in constraints include '_constraint' active: bool, default=True status to set the constraint/objective """ if not hasattr(backend_model, constraint): raise exceptions.ModelError( "constraint/objective `{}` not in the Pyomo Backend.".format( constraint)) elif not isinstance(getattr(backend_model, constraint), po.base.Constraint): raise exceptions.ModelError( "`{}` not a constraint in the Pyomo Backend.".format(constraint)) elif active is True: getattr(backend_model, constraint).activate() elif active is False: getattr(backend_model, constraint).deactivate() else: raise ValueError("Argument `active` must be True or False")
def _error_on_malformed_input(): if backend_rerun: try: backend_rerun.obj() except ValueError: # model has not yet been run pass else: raise exceptions.ModelError( "Cannot run SPORES if the backend model already has a solution. " "Consider using the `build_only` optional `run()` argument to avoid this." ) if "spores" in model_data.filter_by_attrs(is_result=0).squeeze().dims: raise exceptions.ModelError( "Cannot run SPORES with a SPORES dimension in any input (e.g. `cost_energy_cap`)." )
def run(self, force_rerun=False, **kwargs): """ Run the model. If ``force_rerun`` is True, any existing results will be overwritten. Additional kwargs are passed to the backend. """ # Check that results exist and are non-empty if hasattr(self, 'results') and self.results.data_vars and not force_rerun: raise exceptions.ModelError( 'This model object already has results. ' 'Use model.run(force_rerun=True) to force' 'the results to be overwritten with a new run.') if (self._model_data.attrs['run.mode'] == 'operate' and not self._model_data.attrs['allow_operate_mode']): raise exceptions.ModelError( 'Unable to run this model in operational mode, probably because ' 'there exist non-uniform timesteps (e.g. from time masking)') results, self._backend_model, interface = run_backend( self._model_data, self._timings, **kwargs) # Add additional post-processed result variables to results if results.attrs.get('termination_condition', None) == 'optimal': results = postprocess.postprocess_model_results( results, self._model_data, self._timings) for var in results.data_vars: results[var].attrs['is_result'] = 1 self._model_data.update(results) self._model_data.attrs.update(results.attrs) if 'run_solution_returned' in self._timings.keys(): self._model_data.attrs['solution_time'] = ( self._timings['run_solution_returned'] - self._timings['run_start']).total_seconds() self._model_data.attrs['time_finished'] = ( self._timings['run_solution_returned'].strftime( '%Y-%m-%d %H:%M:%S')) self.results = self._model_data.filter_by_attrs(is_result=1) self.backend = interface(self)
def drop(data, timesteps): """ Drop timesteps from data, adjusting the timestep weight of remaining timesteps accordingly. Returns updated dataset. Parameters ---------- data : xarray.Dataset Calliope model data. timestesp : str or list or other iterable Pandas-compatible timestep strings. """ # Turn timesteps into a pandas datetime index for subsetting, which also # checks whether they are actually valid try: timesteps_pd = pd.to_datetime(timesteps) except Exception as e: raise exceptions.ModelError('Invalid timesteps: {}'.format(timesteps)) # 'Distribute weight' of the dropped timesteps onto the remaining ones dropped_weight = data.timestep_weights.loc[{ 'timesteps': timesteps_pd }].sum() data = data.drop(timesteps_pd, dim='timesteps') data['timestep_weights'] = data['timestep_weights'] + ( dropped_weight / len(data['timestep_weights'])) return data
def load_overrides_from_scenario(config_model, scenario): def _get_overrides(scenario_name): _overrides = config_model.get_key(f"scenarios.{scenario_name}", [scenario_name]) if isinstance(_overrides, list): return _overrides else: return [_overrides] if scenario in config_model.get("scenarios", {}).keys(): if "," in scenario: exceptions.warn( f"Scenario name `{scenario}` includes commas that won't be parsed as a list of overrides." ) logger.info("Loading overrides from scenario: {} ".format(scenario)) scenario_list = _get_overrides(scenario) else: scenario_list = scenario.split(",") scenario_overrides = set() for override in scenario_list: if isinstance(override, dict): raise exceptions.ModelError( "Scenario definition must be a list of override or other scenario names." ) if override in config_model.get("scenarios", {}).keys(): scenario_overrides.update( load_overrides_from_scenario(config_model, override)) else: scenario_overrides.add(override) return list(scenario_overrides)
def lookup_loc_techs_non_conversion(model_run): """ loc_techs be linked to their loc_tech_carriers, based on their carrier_in or carrier_out attribute. E.g. `X1::ccgt` will be linked to `X1::ccgt::power` as carrier_out for the ccgt is `power`. """ lookup_loc_techs_dict = dict(dims=['loc_techs_non_conversion']) data = [] for loc_tech in model_run.sets['loc_techs_non_conversion']: # For any non-conversion technology, there is only one carrier (either # produced or consumed) loc_tech_carrier = list( set(i for i in model_run.sets['loc_tech_carriers_prod'] + model_run.sets['loc_tech_carriers_con'] if loc_tech == i.rsplit("::", 1)[0])) if len(loc_tech_carrier) > 1: raise exceptions.ModelError( 'More than one carrier associated with ' 'non-conversion location:technology `{}`'.format(loc_tech)) else: data.append(loc_tech_carrier[0]) lookup_loc_techs_dict['data'] = data return lookup_loc_techs_dict
def combine_overrides(config_model, overrides): override_dict = AttrDict() for override in overrides: try: yaml_string = config_model.overrides[override].to_yaml() override_with_imports = AttrDict.from_yaml_string(yaml_string) except KeyError: raise exceptions.ModelError( "Override `{}` is not defined.".format(override)) try: override_dict.union(override_with_imports, allow_override=False) except KeyError as e: raise exceptions.ModelError( str(e)[1:-1] + ". Already specified but defined again in " "override `{}`.".format(override)) return override_dict
def update_pyomo_param(backend_model, param, index, value): """ A Pyomo Param value can be updated without the user directly accessing the backend model. Parameters ---------- param : str Name of the parameter to update index : tuple of strings Tuple of dimension indeces, in the order given in model.inputs for the reciprocal parameter value : int, float, bool, or str Value to assign to the Pyomo Param at the given index Returns ------- Value will be updated in-place, requiring the user to run the model again to see the effect on results. """ if not hasattr(backend_model, param): raise exceptions.ModelError( 'Parameter {} not in the Pyomo Backend. Check that the string ' 'matches the corresponding constraint/cost in the model.inputs ' 'xarray Dataset'.format(param) ) elif not isinstance(getattr(backend_model, param), po.base.param.IndexedParam): raise exceptions.ModelError( '{} not a Parameter in the Pyomo Backend. Sets and decision variables ' 'cannot be updated by the user'.format(param) ) elif index not in getattr(backend_model, param): raise exceptions.ModelError( 'index {} not in the Pyomo Parameter {}. call ' 'model.access_backend_model_inputs to see the indeces of the Pyomo ' 'Parameters'.format(index, param) ) else: print( 'Warning: we currently do not check that the updated value is the ' 'correct data type for this Pyomo Parameter, this is your ' 'responsibility to check!' ) getattr(backend_model, param)[index] = value
def _check_data(self): if self.node_dict or self.tech_dict: raise exceptions.ModelError( "Some data not extracted from inputs into model dataset:\n" f"{self.node_dict}") self.model_data, final_check_comments, warns, errors = checks.check_model_data( self.model_data) exceptions.print_warnings_and_raise_errors(warnings=warns, errors=errors)
def check_timeseries_dataframes(timeseries_dataframes): """ Timeseries dataframes should be dict of pandas DataFrames. """ if not isinstance(timeseries_dataframes, dict) or not all([ isinstance(timeseries_dataframes[i], pd.DataFrame) for i in timeseries_dataframes ]): raise exceptions.ModelError( "Error in loading timeseries data from dataframes. " "`timeseries_dataframes` must be dict of pandas DataFrames.")
def run(self, force_rerun=False, **kwargs): """ Run the model. If ``force_rerun`` is True, any existing results will be overwritten. Additional kwargs are passed to the backend. """ # Check that results exist and are non-empty if hasattr(self, "results") and self.results.data_vars and not force_rerun: raise exceptions.ModelError( "This model object already has results. " "Use model.run(force_rerun=True) to force" "the results to be overwritten with a new run." ) if ( self.run_config["mode"] == "operate" and not self._model_data.attrs["allow_operate_mode"] ): raise exceptions.ModelError( "Unable to run this model in operational mode, probably because " "there exist non-uniform timesteps (e.g. from time masking)" ) results, self._backend_model, self._backend_model_opt, interface = run_backend( self._model_data, self._timings, **kwargs ) # Add additional post-processed result variables to results if results.attrs.get("termination_condition", None) in ["optimal", "feasible"]: results = postprocess_results.postprocess_model_results( results, self._model_data, self._timings ) self._model_data.attrs.update(results.attrs) self._model_data = xr.merge( [results, self._model_data], compat="override", combine_attrs="no_conflicts" ) self._add_model_data_methods() self.backend = interface(self)
def energy_capacity_systemwide_constraint_rule(backend_model, tech): """ Set constraints to limit the capacity of a single technology type across all locations in the model. The first valid case is applied: .. container:: scrolling-wrapper .. math:: \\sum_{loc}\\boldsymbol{energy_{cap}}(loc::tech) \\begin{cases} = energy_{cap, equals, systemwide}(loc::tech),& \\text{if } energy_{cap, equals, systemwide}(loc::tech)\\\\ \\leq energy_{cap, max, systemwide}(loc::tech),& \\text{if } energy_{cap, max, systemwide}(loc::tech)\\\\ \\text{unconstrained},& \\text{otherwise} \\end{cases} \\forall tech \\in techs """ if tech in backend_model.techs_transmission_names: all_loc_techs = [ i for i in backend_model.loc_techs_transmission if i.split('::')[1].split(':')[0] == tech ] multiplier = 2 # there are always two technologies associated with one link else: all_loc_techs = [ i for i in backend_model.loc_techs if i.split('::')[1] == tech ] multiplier = 1 max_systemwide = get_param(backend_model, 'energy_cap_max_systemwide', tech) equals_systemwide = get_param(backend_model, 'energy_cap_equals_systemwide', tech) if np.isinf(po.value(max_systemwide)) and not equals_systemwide: return po.Constraint.NoConstraint elif equals_systemwide and np.isinf(po.value(equals_systemwide)): raise exceptions.ModelError( 'Cannot use inf for energy_cap_equals_systemwide for tech `{}`'. format(tech)) sum_expr = sum(backend_model.energy_cap[loc_tech] for loc_tech in all_loc_techs) if equals_systemwide: return sum_expr == equals_systemwide * multiplier else: return sum_expr <= max_systemwide * multiplier
def get_daily_timesteps(data, check_uniformity=False): daily_timesteps = [ data.timestep_resolution.loc[i].values for i in np.unique(data.timesteps.to_index().strftime('%Y-%m-%d')) ] if check_uniformity: if not np.all(daily_timesteps == daily_timesteps[0]): raise exceptions.ModelError( 'For clustering, timestep resolution must be uniform.') return daily_timesteps[0]
def load_timeseries_from_dataframe(timeseries_dataframes, tskey): # If `df=` is called, timeseries_dataframes must be entered if timeseries_dataframes is None: raise exceptions.ModelError( "Error in loading timeseries. Model config specifies df={} but " "no timeseries passed as arguments in calliope.Model(...). " "Note that, if running from a command line, it is not possible " "to read dataframes via `df=...` and you should specify " "`file=...` with a CSV file.".format(tskey)) try: df = timeseries_dataframes[tskey] except KeyError: raise exceptions.ModelError( "Error in loading data from dataframe. " "Model attempted to load dataframe with key `{}`, " "but available dataframes are {}".format( tskey, set(timeseries_dataframes.keys()))) df.columns = pd.MultiIndex.from_product([[tskey], df.columns], names=["source", "column"]) return df
def lookup_loc_techs_conversion(dataset, model_run): """ Conversion technologies are seperated from other non-conversion technologies as there is more than one carrier associated with a single loc_tech. Here, the link is made per carrier tier (`out` and `in` are the two primary carrier tiers) """ # Get the string name for a loc_tech which includes the carriers in and out # associated with that technology (for conversion technologies) carrier_tiers = model_run.sets["carrier_tiers"] loc_techs_conversion_array = xr.DataArray( data=np.empty( (len(model_run.sets["loc_techs_conversion"]), len(carrier_tiers)), dtype=np.object, ), dims=["loc_techs_conversion", "carrier_tiers"], coords={ "loc_techs_conversion": list(model_run.sets["loc_techs_conversion"]), "carrier_tiers": list(carrier_tiers), }, ) for loc_tech in model_run.sets["loc_techs_conversion"]: # For any non-conversion technology, there are only two carriers # (one produced and one consumed) loc_tech_carrier_in = [ i for i in model_run.sets["loc_tech_carriers_con"] if loc_tech == i.rsplit("::", 1)[0] ] loc_tech_carrier_out = [ i for i in model_run.sets["loc_tech_carriers_prod"] if loc_tech == i.rsplit("::", 1)[0] ] if len(loc_tech_carrier_in) > 1 or len(loc_tech_carrier_out) > 1: raise exceptions.ModelError( "More than one carrier in or out associated with " "conversion location:technology `{}`".format(loc_tech) ) else: loc_techs_conversion_array.loc[ dict(loc_techs_conversion=loc_tech, carrier_tiers=["in", "out"]) ] = [loc_tech_carrier_in[0], loc_tech_carrier_out[0]] dataset = dataset.merge( loc_techs_conversion_array.to_dataset(name="lookup_loc_techs_conversion") ) return dataset
def combine_overrides(override_file_path, override_groups): if ',' in override_groups: overrides = override_groups.split(',') else: overrides = [override_groups] override = AttrDict() for group in overrides: try: override_group_from_file = AttrDict.from_yaml( override_file_path)[group] except KeyError: raise exceptions.ModelError( 'Override group `{}` does not exist in file `{}`.'.format( group, override_file_path)) try: override.union(override_group_from_file, allow_override=False) except KeyError as e: raise exceptions.ModelError( str(e)[1:-1] + '. Already specified but defined again in ' 'override group `{}`.'.format(group)) return override
def rerun_pyomo_model(model_data, backend_model): """ Rerun the Pyomo backend, perhaps after updating a parameter value, (de)activating a constraint/objective or updating run options in the model model_data object (e.g. `run.solver`). Returns ------- run_data : xarray.Dataset Raw data from this rerun, including both inputs and results. to filter inputs/results, use `run_data.filter_by_attrs(is_result=...)` with 0 for inputs and 1 for results. """ backend_model.__calliope_run_config = AttrDict.from_yaml_string(model_data.attrs['run_config']) if backend_model.__calliope_run_config['mode'] != 'plan': raise exceptions.ModelError( 'Cannot rerun the backend in {} run mode. Only `plan` mode is ' 'possible.'.format(backend_model.__calliope_run_config['mode']) ) timings = {} log_time(logger, timings, 'model_creation') results, backend_model = backend_run.run_plan( model_data, timings, run_pyomo, build_only=False, backend_rerun=backend_model ) for k, v in timings.items(): results.attrs['timings.' + k] = v exceptions.ModelWarning( 'model.results will only be updated on running the model from ' '`model.run()`. We provide results of this rerun as a standalone xarray ' 'Dataset' ) results.attrs.update(model_data.attrs) for key, var in results.data_vars.items(): var.attrs['is_result'] = 1 inputs = access_pyomo_model_inputs(backend_model) for key, var in inputs.data_vars.items(): var.attrs['is_result'] = 0 results.update(inputs) run_data = results return run_data
def _get_array(data, var, tech, **kwargs): subset = {"techs": tech} if kwargs is not None: subset.update({k: v for k, v in kwargs.items()}) unusable_dims = (set(subset.keys()).difference( ["techs", "nodes"]).difference(data[var].dims)) if unusable_dims: raise exceptions.ModelError( "Attempting to mask time based on technology {}, " "but dimension(s) {} do not exist for parameter {}".format( tech, unusable_dims, var.name)) arr = data[var].loc[subset].groupby("timesteps").mean(...).to_pandas() return arr
def _stack_data(data, dates, times): """ Stack all non-time dimensions of an xarray DataArray """ data_to_stack = data.assign_coords(timesteps=pd.MultiIndex.from_product( [dates, times], names=["dates", "times"])).unstack("timesteps") non_date_dims = list( set(data_to_stack.dims).difference(["dates", "times"])) + ["times"] if len(non_date_dims) >= 2: stacked_var = data_to_stack.stack(stacked=non_date_dims) else: raise exceptions.ModelError( "Cannot conduct time clustering with variable {} as it has no " "non-time dimensions.".format(data.name)) return stacked_var
def _get_array(data, var, tech, **kwargs): subset = {'techs':tech} if kwargs is not None: subset.update({k:v for k, v in kwargs.items()}) unusable_dims = (set(subset.keys()) .difference(["techs", "locs"]) .difference(data[var].dims) ) if unusable_dims: raise exceptions.ModelError("attempting to mask time based on " "technology {}, but dimension(s) " "{} don't exist for parameter {}".format( tech, unusable_dims, var.name)) arr = split_loc_techs(data[var].copy()).loc[subset] arr = arr.mean(dim=[i for i in arr.dims if i is not 'timesteps']).to_pandas() return arr
def _get_carrier_group_constraint_loc_techs( loc_techs, locs, config, constraint_name, sets, constraint_sets ): flow = "con" if "_con_" in constraint_name else "prod" if len(config.keys()) > 1: raise exceptions.ModelError( "Can only handle one carrier per group constraint that is carrier-based" ) carrier = list(config.keys())[0] if "net_import" in constraint_name: _loc_tech_carriers = _get_net_import_loc_tech_carrier_subset( loc_techs, sets["loc_tech_carriers_con"] + sets["loc_tech_carriers_prod"], carrier, locs, ) else: _loc_tech_carriers = _get_loc_tech_carrier_subset( loc_techs, sets[f"loc_tech_carriers_{flow}"], carrier, locs ) if "share" in constraint_name: lhs_loc_tech_carriers = _loc_tech_carriers if "demand" in constraint_name or "import" in constraint_name: rhs_loc_tech_carriers = _get_loc_tech_carrier_subset( sets["loc_techs_demand"], sets["loc_tech_carriers_con"], carrier, locs ) elif flow == "con": rhs_loc_tech_carriers = _get_loc_tech_carrier_subset( sets["loc_techs_demand"] + sets["loc_techs_conversion_all"], sets["loc_tech_carriers_con"], carrier, locs, ) elif flow == "prod": rhs_loc_tech_carriers = _get_loc_tech_carrier_subset( sets["loc_techs_supply_conversion_all"], sets["loc_tech_carriers_prod"], carrier, locs, ) return { "group_constraint_loc_tech_carriers_{}_lhs": list(lhs_loc_tech_carriers), "group_constraint_loc_tech_carriers_{}_rhs": list(rhs_loc_tech_carriers), } else: return {"group_constraint_loc_tech_carriers_{}": list(_loc_tech_carriers)}
def _get_relevant_vars(model, dataset, array): carriers = list(dataset.carriers.values) allowed_input_vars = [ k for k, v in model.inputs.data_vars.items() if 'timesteps' in v.dims and len(v.dims) > 1 ] allowed_result_vars = ([ 'results', 'inputs', 'all', 'storage', 'resource_con', 'cost_var' ]) if ((isinstance(array, list) and not set(array).intersection(allowed_input_vars + allowed_result_vars + carriers)) or (isinstance(array, str) and array not in allowed_input_vars + allowed_result_vars + carriers)): raise exceptions.ModelError( 'Cannot plot array={}. If you want carrier flow (_prod, _con, _export) ' 'then specify the name of the energy carrier as array'.format( array)) # relevant_vars are all variables relevant to this plotting instance relevant_vars = [] # Ensure carriers are at the top of the list if array == 'results': relevant_vars += sorted(carriers) + sorted(allowed_result_vars) elif array == 'inputs': relevant_vars += sorted(allowed_input_vars) elif array == 'all': relevant_vars += sorted(carriers) + sorted(allowed_result_vars + allowed_input_vars) elif isinstance(array, list): relevant_vars = array elif isinstance(array, str): relevant_vars = [array] relevant_vars = [i for i in relevant_vars if i in dataset or i in carriers] return relevant_vars
def _get_relevant_vars(dataset, array): allowed_input_vars = [ i + j for i, j in product([ 'resource_area', 'energy_cap', 'resource_cap', 'storage_cap', 'units' ], ['_max', '_min', '_equals']) ] allowed_result_vars = [ 'results', 'inputs', 'all', 'resource_area', 'energy_cap', 'resource_cap', 'storage_cap', 'units', 'systemwide_levelised_cost', 'systemwide_capacity_factor' ] if ((isinstance(array, list) and not set(array) != set(allowed_input_vars + allowed_result_vars)) or (isinstance(array, str) and array not in allowed_input_vars + allowed_result_vars)): raise exceptions.ModelError( 'Cannot plot array={}. as one or more of the elements is not considered ' 'to be a capacity'.format(array)) # relevant_vars are all variables relevant to this plotting instance if array == 'results': relevant_vars = sorted(allowed_result_vars) elif array == 'inputs': relevant_vars = sorted(allowed_input_vars) elif array == 'all': relevant_vars = sorted(allowed_result_vars + allowed_input_vars) elif isinstance(array, list): relevant_vars = array elif isinstance(array, str): relevant_vars = [array] relevant_vars = [i for i in relevant_vars if i in dataset] # Remove all vars that don't actually turn up in the dataset, which is relevant # ony really for results vars return sorted( list(set(relevant_vars).intersection(dataset.data_vars.keys())))
def run_operate(model_data, timings, backend, build_only): """ For use when mode is 'operate', to allow the model to be built, edited, and iteratively run within Pyomo. """ log_time( logger, timings, "run_start", comment="Backend: starting model run in operational mode", ) defaults = UpdateObserverDict( initial_yaml_string=model_data.attrs["defaults"], name="defaults", observer=model_data, ) run_config = UpdateObserverDict( initial_yaml_string=model_data.attrs["run_config"], name="run_config", observer=model_data, ) # New param defaults = old maximum param defaults (e.g. energy_cap gets default from energy_cap_max) operate_params = { k.replace("_max", ""): v for k, v in defaults.items() if k.endswith("_max") } operate_params[ "purchased"] = 0 # no _max to work from here, so we hardcode a default defaults.update(operate_params) # Capacity results (from plan mode) can be used as the input to operate mode if any(model_data.filter_by_attrs( is_result=1).data_vars) and run_config.get( "operation.use_cap_results", False): # Anything with is_result = 1 will be ignored in the Pyomo model for varname, varvals in model_data.data_vars.items(): if varname in operate_params.keys(): varvals.attrs["is_result"] = 1 varvals.attrs["operate_param"] = 1 else: cap_max = xr.merge([ v.rename(k.replace("_max", "")) for k, v in model_data.data_vars.items() if "_max" in k ]) cap_equals = xr.merge([ v.rename(k.replace("_equals", "")) for k, v in model_data.data_vars.items() if "_equals" in k ]) caps = cap_max.update(cap_equals) for cap in caps.data_vars.values(): cap.attrs["is_result"] = 1 cap.attrs["operate_param"] = 1 model_data.update(caps) comments, warnings, errors = checks.check_operate_params(model_data) exceptions.print_warnings_and_raise_errors(warnings=warnings, errors=errors) # Initialize our variables solver = run_config["solver"] solver_io = run_config.get("solver_io", None) solver_options = run_config.get("solver_options", None) save_logs = run_config.get("save_logs", None) window = run_config["operation"]["window"] horizon = run_config["operation"]["horizon"] window_to_horizon = horizon - window # get the cumulative sum of timestep resolution, to find where we hit our window and horizon timestep_cumsum = model_data.timestep_resolution.cumsum( "timesteps").to_pandas() # get the timesteps at which we start and end our windows window_ends = timestep_cumsum.where((timestep_cumsum % window == 0) | ( timestep_cumsum == timestep_cumsum[-1])) window_starts = timestep_cumsum.where((~np.isnan(window_ends.shift(1))) | ( timestep_cumsum == timestep_cumsum[0])).dropna() window_ends = window_ends.dropna() horizon_ends = timestep_cumsum[timestep_cumsum.isin(window_ends.values + window_to_horizon)] if not any(window_starts): raise exceptions.ModelError( "Not enough timesteps or incorrect timestep resolution to run in " "operational mode with an optimisation window of {}".format( window)) # We will only update timseries parameters timeseries_data_vars = [ k for k, v in model_data.data_vars.items() if "timesteps" in v.dims and v.attrs["is_result"] == 0 ] # Loop through each window, solve over the horizon length, and add result to # result_array we only go as far as the end of the last horizon, which may # clip the last bit of data result_array = [] # track whether each iteration finds an optimal solution or not terminations = [] if build_only: iterations = [0] else: iterations = range(len(window_starts)) for i in iterations: start_timestep = window_starts.index[i] # Build full model in first instance if i == 0: warmstart = False end_timestep = horizon_ends.index[i] timesteps = slice(start_timestep, end_timestep) window_model_data = model_data.loc[dict(timesteps=timesteps)] log_time( logger, timings, "model_gen_1", comment="Backend: generating initial model", ) backend_model = backend.generate_model(window_model_data) # Build the full model in the last instance(s), # where number of timesteps is less than the horizon length elif i > len(horizon_ends) - 1: warmstart = False end_timestep = window_ends.index[i] timesteps = slice(start_timestep, end_timestep) window_model_data = model_data.loc[dict(timesteps=timesteps)] log_time( logger, timings, "model_gen_{}".format(i + 1), comment=( "Backend: iteration {}: generating new model for " "end of timeseries, with horizon = {} timesteps".format( i + 1, window_ends[i] - window_starts[i])), ) backend_model = backend.generate_model(window_model_data) # Update relevent Pyomo Params in intermediate instances else: warmstart = True end_timestep = horizon_ends.index[i] timesteps = slice(start_timestep, end_timestep) window_model_data = model_data.loc[dict(timesteps=timesteps)] log_time( logger, timings, "model_gen_{}".format(i + 1), comment="Backend: iteration {}: updating model parameters". format(i + 1), ) # Pyomo model sees the same timestamps each time, we just change the # values associated with those timestamps for var in timeseries_data_vars: # New values var_series = ( window_model_data[var].to_series().dropna().replace( "inf", np.inf)) # Same timestamps var_series.index = backend_model.__calliope_model_data["data"][ var].keys() var_dict = var_series.to_dict() # Update pyomo Param with new dictionary getattr(backend_model, var).store_values(var_dict) if not build_only: log_time( logger, timings, "model_run_{}".format(i + 1), time_since_run_start=True, comment="Backend: iteration {}: sending model to solver". format(i + 1), ) # After iteration 1, warmstart = True, which should speed up the process # Note: Warmstart isn't possible with GLPK (dealt with later on) _results = backend.solve_model( backend_model, solver=solver, solver_io=solver_io, solver_options=solver_options, save_logs=save_logs, warmstart=warmstart, ) log_time( logger, timings, "run_solver_exit_{}".format(i + 1), time_since_run_start=True, comment="Backend: iteration {}: solver finished running". format(i + 1), ) # xarray dataset is built for each iteration _termination = backend.load_results(backend_model, _results) terminations.append(_termination) _results = backend.get_result_array(backend_model, model_data) # We give back the actual timesteps for this iteration and take a slice # equal to the window length _results["timesteps"] = window_model_data.timesteps.copy() # We always save the window data. Until the last window(s) this will crop # the window_to_horizon timesteps. In the last window(s), optimistion will # only be occurring over a window length anyway _results = _results.loc[dict( timesteps=slice(None, window_ends.index[i]))] result_array.append(_results) # Set up initial storage for the next iteration if "loc_techs_store" in model_data.dims.keys(): storage_initial = _results.storage.loc[{ "timesteps": window_ends.index[i] }].drop("timesteps") model_data["storage_initial"].loc[ storage_initial.coords] = storage_initial.values backend_model.storage_initial.store_values( storage_initial.to_series().dropna().to_dict()) # Set up total operated units for the next iteration if "loc_techs_milp" in model_data.dims.keys(): operated_units = _results.operating_units.sum( "timesteps").astype(np.int) model_data["operated_units"].loc[{}] += operated_units.values backend_model.operated_units.store_values( operated_units.to_series().dropna().to_dict()) log_time( logger, timings, "run_solver_exit_{}".format(i + 1), time_since_run_start=True, comment="Backend: iteration {}: generated solution array". format(i + 1), ) if build_only: results = xr.Dataset() else: # Concatenate results over the timestep dimension to get a single # xarray Dataset of interest results = xr.concat(result_array, dim="timesteps") if all(i == "optimal" for i in terminations): results.attrs["termination_condition"] = "optimal" elif all(i in ["optimal", "feasible"] for i in terminations): results.attrs["termination_condition"] = "feasible" else: results.attrs["termination_condition"] = ",".join(terminations) log_time( logger, timings, "run_solution_returned", time_since_run_start=True, comment="Backend: generated full solution array", ) return results, backend_model
def run_operate(model_data, timings, backend, build_only): """ For use when mode is 'operate', to allow the model to be built, edited, and iteratively run within Pyomo. """ log_time(logger, timings, 'run_start', comment='Backend: starting model run in operational mode') defaults = AttrDict.from_yaml_string(model_data.attrs['defaults']) run_config = AttrDict.from_yaml_string(model_data.attrs['run_config']) operate_params = ['purchased'] + [ i.replace('_max', '') for i in defaults if i[-4:] == '_max' ] # Capacity results (from plan mode) can be used as the input to operate mode if (any(model_data.filter_by_attrs(is_result=1).data_vars) and run_config.get('operation.use_cap_results', False)): # Anything with is_result = 1 will be ignored in the Pyomo model for varname, varvals in model_data.data_vars.items(): if varname in operate_params: varvals.attrs['is_result'] = 1 varvals.attrs['operate_param'] = 1 else: cap_max = xr.merge([ v.rename(k.replace('_max', '')) for k, v in model_data.data_vars.items() if '_max' in k ]) cap_equals = xr.merge([ v.rename(k.replace('_equals', '')) for k, v in model_data.data_vars.items() if '_equals' in k ]) caps = cap_max.update(cap_equals) for cap in caps.data_vars.values(): cap.attrs['is_result'] = 1 cap.attrs['operate_param'] = 1 model_data.update(caps) # Storage initial is carried over between iterations, so must be defined along with storage if ('loc_techs_store' in model_data.dims.keys() and 'storage_initial' not in model_data.data_vars.keys()): model_data['storage_initial'] = (xr.DataArray( [0 for loc_tech in model_data.loc_techs_store.values], dims='loc_techs_store')) model_data['storage_initial'].attrs['is_result'] = 0 exceptions.warn( 'Initial stored energy not defined, set to zero for all ' 'loc::techs in loc_techs_store, for use in iterative optimisation') # Operated units is carried over between iterations, so must be defined in a milp model if ('loc_techs_milp' in model_data.dims.keys() and 'operated_units' not in model_data.data_vars.keys()): model_data['operated_units'] = (xr.DataArray( [0 for loc_tech in model_data.loc_techs_milp.values], dims='loc_techs_milp')) model_data['operated_units'].attrs['is_result'] = 1 model_data['operated_units'].attrs['operate_param'] = 1 exceptions.warn( 'daily operated units not defined, set to zero for all ' 'loc::techs in loc_techs_milp, for use in iterative optimisation') comments, warnings, errors = checks.check_operate_params(model_data) exceptions.print_warnings_and_raise_errors(warnings=warnings, errors=errors) # Initialize our variables solver = run_config['solver'] solver_io = run_config.get('solver_io', None) solver_options = run_config.get('solver_options', None) save_logs = run_config.get('save_logs', None) window = run_config['operation']['window'] horizon = run_config['operation']['horizon'] window_to_horizon = horizon - window # get the cumulative sum of timestep resolution, to find where we hit our window and horizon timestep_cumsum = model_data.timestep_resolution.cumsum( 'timesteps').to_pandas() # get the timesteps at which we start and end our windows window_ends = timestep_cumsum.where((timestep_cumsum % window == 0) | ( timestep_cumsum == timestep_cumsum[-1])) window_starts = timestep_cumsum.where((~np.isnan(window_ends.shift(1))) | ( timestep_cumsum == timestep_cumsum[0])).dropna() window_ends = window_ends.dropna() horizon_ends = timestep_cumsum[timestep_cumsum.isin(window_ends.values + window_to_horizon)] if not any(window_starts): raise exceptions.ModelError( 'Not enough timesteps or incorrect timestep resolution to run in ' 'operational mode with an optimisation window of {}'.format( window)) # We will only update timseries parameters timeseries_data_vars = [ k for k, v in model_data.data_vars.items() if 'timesteps' in v.dims and v.attrs['is_result'] == 0 ] # Loop through each window, solve over the horizon length, and add result to # result_array we only go as far as the end of the last horizon, which may # clip the last bit of data result_array = [] # track whether each iteration finds an optimal solution or not terminations = [] if build_only: iterations = [0] else: iterations = range(len(window_starts)) for i in iterations: start_timestep = window_starts.index[i] # Build full model in first instance if i == 0: warmstart = False end_timestep = horizon_ends.index[i] timesteps = slice(start_timestep, end_timestep) window_model_data = model_data.loc[dict(timesteps=timesteps)] log_time(logger, timings, 'model_gen_1', comment='Backend: generating initial model') backend_model = backend.generate_model(window_model_data) # Build the full model in the last instance(s), # where number of timesteps is less than the horizon length elif i > len(horizon_ends) - 1: warmstart = False end_timestep = window_ends.index[i] timesteps = slice(start_timestep, end_timestep) window_model_data = model_data.loc[dict(timesteps=timesteps)] log_time( logger, timings, 'model_gen_{}'.format(i + 1), comment=( 'Backend: iteration {}: generating new model for ' 'end of timeseries, with horizon = {} timesteps'.format( i + 1, window_ends[i] - window_starts[i]))) backend_model = backend.generate_model(window_model_data) # Update relevent Pyomo Params in intermediate instances else: warmstart = True end_timestep = horizon_ends.index[i] timesteps = slice(start_timestep, end_timestep) window_model_data = model_data.loc[dict(timesteps=timesteps)] log_time( logger, timings, 'model_gen_{}'.format(i + 1), comment='Backend: iteration {}: updating model parameters'. format(i + 1)) # Pyomo model sees the same timestamps each time, we just change the # values associated with those timestamps for var in timeseries_data_vars: # New values var_series = window_model_data[var].to_series().dropna( ).replace('inf', np.inf) # Same timestamps var_series.index = backend_model.__calliope_model_data['data'][ var].keys() var_dict = var_series.to_dict() # Update pyomo Param with new dictionary getattr(backend_model, var).store_values(var_dict) if not build_only: log_time(logger, timings, 'model_run_{}'.format(i + 1), time_since_run_start=True, comment='Backend: iteration {}: sending model to solver'. format(i + 1)) # After iteration 1, warmstart = True, which should speed up the process # Note: Warmstart isn't possible with GLPK (dealt with later on) _results = backend.solve_model( backend_model, solver=solver, solver_io=solver_io, solver_options=solver_options, save_logs=save_logs, warmstart=warmstart, ) log_time(logger, timings, 'run_solver_exit_{}'.format(i + 1), time_since_run_start=True, comment='Backend: iteration {}: solver finished running'. format(i + 1)) # xarray dataset is built for each iteration _termination = backend.load_results(backend_model, _results) terminations.append(_termination) _results = backend.get_result_array(backend_model, model_data) # We give back the actual timesteps for this iteration and take a slice # equal to the window length _results['timesteps'] = window_model_data.timesteps.copy() # We always save the window data. Until the last window(s) this will crop # the window_to_horizon timesteps. In the last window(s), optimistion will # only be occurring over a window length anyway _results = _results.loc[dict( timesteps=slice(None, window_ends.index[i]))] result_array.append(_results) # Set up initial storage for the next iteration if 'loc_techs_store' in model_data.dims.keys(): storage_initial = _results.storage.loc[{ 'timesteps': window_ends.index[i] }].drop('timesteps') model_data['storage_initial'].loc[ storage_initial.coords] = storage_initial.values backend_model.storage_initial.store_values( storage_initial.to_series().dropna().to_dict()) # Set up total operated units for the next iteration if 'loc_techs_milp' in model_data.dims.keys(): operated_units = _results.operating_units.sum( 'timesteps').astype(np.int) model_data['operated_units'].loc[{}] += operated_units.values backend_model.operated_units.store_values( operated_units.to_series().dropna().to_dict()) log_time(logger, timings, 'run_solver_exit_{}'.format(i + 1), time_since_run_start=True, comment='Backend: iteration {}: generated solution array'. format(i + 1)) if build_only: results = xr.Dataset() else: # Concatenate results over the timestep dimension to get a single # xarray Dataset of interest results = xr.concat(result_array, dim='timesteps') if all(i == 'optimal' for i in terminations): results.attrs['termination_condition'] = 'optimal' elif all(i in ['optimal', 'feasible'] for i in terminations): results.attrs['termination_condition'] = 'feasible' else: results.attrs['termination_condition'] = ','.join(terminations) log_time(logger, timings, 'run_solution_returned', time_since_run_start=True, comment='Backend: generated full solution array') return results, backend_model
def process_timeseries_data(config_model, model_run): if config_model.model.timeseries_data is None: timeseries_data = AttrDict() else: timeseries_data = config_model.model.timeseries_data def _parser(x, dtformat): return pd.to_datetime(x, format=dtformat, exact=False) if 'timeseries_data_path' in config_model.model: dtformat = config_model.model['timeseries_dateformat'] # Generate the set of all files we want to read from file location_config = model_run.locations.as_dict_flat() model_config = config_model.model.as_dict_flat() get_filenames = lambda config: set([ v.split('=')[1].rsplit(':', 1)[0] for v in config.values() if 'file=' in str(v) ]) constraint_filenames = get_filenames(location_config) cluster_filenames = get_filenames(model_config) datetime_min = [] datetime_max = [] for file in constraint_filenames | cluster_filenames: file_path = os.path.join(config_model.model.timeseries_data_path, file) # load the data, without parsing the dates, to catch errors in the data df = pd.read_csv(file_path, index_col=0) try: df.apply(pd.to_numeric) except ValueError as e: raise exceptions.ModelError( 'Error in loading data from {}. Ensure all entries are ' 'numeric. Full error: {}'.format(file, e) ) # Now parse the dates, checking for errors specific to this try: df.index = _parser(df.index, dtformat) except ValueError as e: raise exceptions.ModelError( 'Error in parsing dates in timeseries data from {}, ' 'using datetime format `{}`: {}'.format(file, dtformat, e) ) timeseries_data[file] = df datetime_min.append(df.index[0].date()) datetime_max.append(df.index[-1].date()) # Apply time subsetting, if supplied in model_run subset_time_config = config_model.model.subset_time if subset_time_config is not None: # Test parsing dates first, to make sure they fit our required subset format try: subset_time = _parser(subset_time_config, '%Y-%m-%d %H:%M:%S') except ValueError as e: raise exceptions.ModelError( 'Timeseries subset must be in ISO format (anything up to the ' 'detail of `%Y-%m-%d %H:%M:%S`.\n User time subset: {}\n ' 'Error caused: {}'.format(subset_time_config, e) ) if isinstance(subset_time_config, list) and len(subset_time_config) == 2: time_slice = slice(subset_time_config[0], subset_time_config[1]) # Don't allow slicing outside the range of input data if (subset_time[0].date() < max(datetime_min) or subset_time[1].date() > min(datetime_max)): raise exceptions.ModelError( 'subset time range {} is outside the input data time range ' '[{}, {}]'.format(subset_time_config, max(datetime_min).strftime('%Y-%m-%d'), min(datetime_max).strftime('%Y-%m-%d')) ) elif isinstance(subset_time_config, list): raise exceptions.ModelError( 'Invalid subset_time value: {}'.format(subset_time_config) ) else: time_slice = str(subset_time_config) for k in timeseries_data.keys(): timeseries_data[k] = timeseries_data[k].loc[time_slice, :] if timeseries_data[k].empty: raise exceptions.ModelError( 'The time slice {} creates an empty timeseries array for {}' .format(time_slice, k) ) # Ensure all timeseries have the same index indices = [ (file, df.index) for file, df in timeseries_data.items() if file not in cluster_filenames ] first_file, first_index = indices[0] for file, idx in indices[1:]: if not first_index.equals(idx): raise exceptions.ModelError( 'Time series indices do not match ' 'between {} and {}'.format(first_file, file) ) return timeseries_data, first_index
def apply_overrides(config, scenario=None, override_dict=None): """ Generate processed Model configuration, applying any scenarios overrides. Parameters ---------- config : AttrDict a model configuration AttrDict scenario : str, optional override_dict : str or dict or AttrDict, optional If a YAML string, converted to AttrDict """ debug_comments = AttrDict() base_model_config_file = os.path.join( os.path.dirname(calliope.__file__), 'config', 'model.yaml' ) config_model = AttrDict.from_yaml(base_model_config_file) # Interpret timeseries_data_path as relative config.model.timeseries_data_path = relative_path( config.config_path, config.model.timeseries_data_path ) # The input files are allowed to override other model defaults config_model.union(config, allow_override=True) # First pass of applying override dict before applying scenarios, # so that can override scenario definitions by override_dict if override_dict: if isinstance(override_dict, str): override_dict = AttrDict.from_yaml_string(override_dict) elif not isinstance(override_dict, AttrDict): override_dict = AttrDict(override_dict) warnings = checks.check_overrides(config_model, override_dict) exceptions.print_warnings_and_raise_errors(warnings=warnings) config_model.union( override_dict, allow_override=True, allow_replacement=True ) if scenario: scenarios = config_model.get('scenarios', {}) if scenario in scenarios: # Manually defined scenario names cannot be the same as single # overrides or any combination of semicolon-delimited overrides if all([i in config_model.get('overrides', {}) for i in scenario.split(',')]): raise exceptions.ModelError( 'Manually defined scenario cannot be a combination of override names.' ) if not isinstance(scenarios[scenario], str): raise exceptions.ModelError( 'Scenario definition must be string of comma-separated overrides.' ) overrides = scenarios[scenario].split(',') logger.info( 'Using scenario `{}` leading to the application of ' 'overrides `{}`.'.format(scenario, scenarios[scenario]) ) else: overrides = str(scenario).split(',') logger.info( 'Applying overrides `{}` without a ' 'specific scenario name.'.format(scenario) ) overrides_from_scenario = combine_overrides(config_model, overrides) warnings = checks.check_overrides(config_model, overrides_from_scenario) exceptions.print_warnings_and_raise_errors(warnings=warnings) config_model.union( overrides_from_scenario, allow_override=True, allow_replacement=True ) for k, v in overrides_from_scenario.as_dict_flat().items(): debug_comments.set_key( '{}'.format(k), 'Applied from override') else: overrides = [] # Second pass of applying override dict after applying scenarios, # so that scenario-based overrides are overridden by override_dict! if override_dict: config_model.union( override_dict, allow_override=True, allow_replacement=True ) for k, v in override_dict.as_dict_flat().items(): debug_comments.set_key( '{}'.format(k), 'Overridden via override dictionary.') return config_model, debug_comments, overrides, scenario
def get_clusters(data, func, timesteps_per_day, tech=None, timesteps=None, k=None, variables=None, **kwargs): """ Run a clustering algorithm on the timeseries data supplied. All timeseries data is reshaped into one row per day before clustering into similar days. Parameters ---------- data : xarray.Dataset Should be normalized func : str 'kmeans' or 'hierarchical' for KMeans or Agglomerative clustering, respectively timesteps_per_day : int Total number of timesteps in a day tech : list, optional list of strings referring to technologies by which clustering is undertaken. If none (default), all technologies within timeseries variables will be used. timesteps : list or str, optional Subset of the time domain within which to apply clustering. k : int, optional Number of clusters to create. If none (default), will use Hartigan's rule to infer a reasonable number of clusters. variables : list, optional data variables (e.g. `resource`, `energy_eff`) by whose values the data will be clustered. If none (default), all timeseries variables will be used. kwargs : dict Additional keyword arguments available depend on the `func`. For available KMeans kwargs see: http://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html For available hierarchical kwargs see: http://scikit-learn.org/stable/modules/generated/sklearn.cluster.AgglomerativeClustering.html Returns ------- clusters : dataframe Indexed by timesteps and with locations as columns, giving cluster membership for first timestep of each day. clustered_data : sklearn.cluster object Result of clustering using sklearn.KMeans(k).fit(X) or sklearn.KMeans(k).AgglomerativeClustering(X). Allows user to access specific attributes, for detailed statistical analysis. """ if timesteps is not None: data = data.loc[{"timesteps": timesteps}] else: timesteps = data.timesteps.values X = reshape_for_clustering(data, tech, variables) if func == "kmeans": if not k: k = hartigan_n_clusters(X) exceptions.warn("Used Hartigan's rule to determine that" "a good number of clusters is {}.".format(k)) clustered_data = sk_cluster.KMeans(k).fit(X) elif func == "hierarchical": if not k: raise exceptions.ModelError( "Cannot undertake hierarchical clustering without a predefined " "number of clusters (k)") clustered_data = sk_cluster.AgglomerativeClustering(k).fit(X) # Determine the cluster membership of each day day_clusters = clustered_data.labels_ # Create mapping of timesteps to clusters clusters = pd.Series(day_clusters, index=timesteps[::timesteps_per_day]) return clusters, clustered_data